1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GCN,GCN-NOHSA,GCN-NOHSA-SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GCN,GCN-HSA %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GCN,GCN-NOHSA,GCN-NOHSA-VI %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,EGCM,EG %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,EGCM,CM %s 7 8; FIXME: r600 is broken because the bigger testcases spill and it's not implemented 9 10define amdgpu_kernel void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 11; GCN-NOHSA-SI-LABEL: global_load_i16: 12; GCN-NOHSA-SI: ; %bb.0: ; %entry 13; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 15; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 18; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 21; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 22; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 23; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 24; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 25; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 26; GCN-NOHSA-SI-NEXT: s_endpgm 27; 28; GCN-HSA-LABEL: global_load_i16: 29; GCN-HSA: ; %bb.0: ; %entry 30; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 31; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 32; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 33; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 34; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 35; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 36; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 37; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 38; GCN-HSA-NEXT: flat_store_short v[0:1], v2 39; GCN-HSA-NEXT: s_endpgm 40; 41; GCN-NOHSA-VI-LABEL: global_load_i16: 42; GCN-NOHSA-VI: ; %bb.0: ; %entry 43; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 45; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 46; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 48; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 50; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 51; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 52; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 53; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 54; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 55; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[0:3], 0 56; GCN-NOHSA-VI-NEXT: s_endpgm 57; 58; EG-LABEL: global_load_i16: 59; EG: ; %bb.0: ; %entry 60; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 61; EG-NEXT: TEX 0 @6 62; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 63; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 64; EG-NEXT: CF_END 65; EG-NEXT: PAD 66; EG-NEXT: Fetch clause starting at 6: 67; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 68; EG-NEXT: ALU clause starting at 8: 69; EG-NEXT: MOV * T0.X, KC0[2].Z, 70; EG-NEXT: ALU clause starting at 9: 71; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 72; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 73; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 74; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 75; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 76; EG-NEXT: LSHL T0.X, T1.W, PV.W, 77; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 78; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 79; EG-NEXT: MOV T0.Y, 0.0, 80; EG-NEXT: MOV * T0.Z, 0.0, 81; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 82; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 83; 84; CM-LABEL: global_load_i16: 85; CM: ; %bb.0: ; %entry 86; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 87; CM-NEXT: TEX 0 @6 88; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 89; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X 90; CM-NEXT: CF_END 91; CM-NEXT: PAD 92; CM-NEXT: Fetch clause starting at 6: 93; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 94; CM-NEXT: ALU clause starting at 8: 95; CM-NEXT: MOV * T0.X, KC0[2].Z, 96; CM-NEXT: ALU clause starting at 9: 97; CM-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 98; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 99; CM-NEXT: AND_INT T0.Z, T0.X, literal.x, 100; CM-NEXT: LSHL * T0.W, PV.W, literal.y, 101; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 102; CM-NEXT: LSHL T0.X, PV.Z, PV.W, 103; CM-NEXT: LSHL * T0.W, literal.x, PV.W, 104; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 105; CM-NEXT: MOV T0.Y, 0.0, 106; CM-NEXT: MOV * T0.Z, 0.0, 107; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 108; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 109entry: 110 %ld = load i16, i16 addrspace(1)* %in 111 store i16 %ld, i16 addrspace(1)* %out 112 ret void 113} 114 115define amdgpu_kernel void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 116; GCN-NOHSA-SI-LABEL: global_load_v2i16: 117; GCN-NOHSA-SI: ; %bb.0: ; %entry 118; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 119; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 120; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 121; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 122; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 123; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 124; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 125; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 126; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 127; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 128; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 129; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 130; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 131; GCN-NOHSA-SI-NEXT: s_endpgm 132; 133; GCN-HSA-LABEL: global_load_v2i16: 134; GCN-HSA: ; %bb.0: ; %entry 135; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 136; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 137; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 138; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 139; GCN-HSA-NEXT: flat_load_dword v2, v[2:3] 140; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 141; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 142; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 143; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 144; GCN-HSA-NEXT: s_endpgm 145; 146; GCN-NOHSA-VI-LABEL: global_load_v2i16: 147; GCN-NOHSA-VI: ; %bb.0: ; %entry 148; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 149; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 150; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 151; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 152; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 153; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 154; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 155; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 156; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 157; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 158; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 159; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 160; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 161; GCN-NOHSA-VI-NEXT: s_endpgm 162; 163; EG-LABEL: global_load_v2i16: 164; EG: ; %bb.0: ; %entry 165; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 166; EG-NEXT: TEX 0 @6 167; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 169; EG-NEXT: CF_END 170; EG-NEXT: PAD 171; EG-NEXT: Fetch clause starting at 6: 172; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 173; EG-NEXT: ALU clause starting at 8: 174; EG-NEXT: MOV * T0.X, KC0[2].Z, 175; EG-NEXT: ALU clause starting at 9: 176; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 177; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 178; 179; CM-LABEL: global_load_v2i16: 180; CM: ; %bb.0: ; %entry 181; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 182; CM-NEXT: TEX 0 @6 183; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 184; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 185; CM-NEXT: CF_END 186; CM-NEXT: PAD 187; CM-NEXT: Fetch clause starting at 6: 188; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 189; CM-NEXT: ALU clause starting at 8: 190; CM-NEXT: MOV * T0.X, KC0[2].Z, 191; CM-NEXT: ALU clause starting at 9: 192; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 193; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 194entry: 195 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in 196 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 197 ret void 198} 199 200define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 201; GCN-NOHSA-SI-LABEL: global_load_v3i16: 202; GCN-NOHSA-SI: ; %bb.0: ; %entry 203; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 204; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 205; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 206; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 207; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 208; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 209; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 210; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 211; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 212; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 213; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 214; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 215; GCN-NOHSA-SI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 216; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 217; GCN-NOHSA-SI-NEXT: s_endpgm 218; 219; GCN-HSA-LABEL: global_load_v3i16: 220; GCN-HSA: ; %bb.0: ; %entry 221; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 222; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 223; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 224; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 225; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 226; GCN-HSA-NEXT: s_add_u32 s2, s0, 4 227; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 228; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 229; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 230; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 231; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 232; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 233; GCN-HSA-NEXT: flat_store_short v[4:5], v1 234; GCN-HSA-NEXT: flat_store_dword v[2:3], v0 235; GCN-HSA-NEXT: s_endpgm 236; 237; GCN-NOHSA-VI-LABEL: global_load_v3i16: 238; GCN-NOHSA-VI: ; %bb.0: ; %entry 239; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 240; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 241; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 242; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 243; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 244; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 245; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 246; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 247; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 248; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 249; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 250; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 251; GCN-NOHSA-VI-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 252; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 253; GCN-NOHSA-VI-NEXT: s_endpgm 254; 255; EG-LABEL: global_load_v3i16: 256; EG: ; %bb.0: ; %entry 257; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 258; EG-NEXT: TEX 2 @6 259; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 260; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 261; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 262; EG-NEXT: CF_END 263; EG-NEXT: Fetch clause starting at 6: 264; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 265; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 266; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 267; EG-NEXT: ALU clause starting at 12: 268; EG-NEXT: MOV * T5.X, KC0[2].Z, 269; EG-NEXT: ALU clause starting at 13: 270; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 271; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 272; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 273; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 274; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 275; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 276; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 277; EG-NEXT: LSHL T5.X, T2.W, PV.W, 278; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 279; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 280; EG-NEXT: MOV T5.Y, 0.0, 281; EG-NEXT: MOV * T5.Z, 0.0, 282; EG-NEXT: LSHR T8.X, T0.W, literal.x, 283; EG-NEXT: LSHL T0.W, T7.X, literal.y, 284; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 285; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 286; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 287; EG-NEXT: OR_INT T6.X, PV.W, PS, 288; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 289; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 290; 291; CM-LABEL: global_load_v3i16: 292; CM: ; %bb.0: ; %entry 293; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 294; CM-NEXT: TEX 2 @6 295; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 296; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X 297; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X 298; CM-NEXT: CF_END 299; CM-NEXT: Fetch clause starting at 6: 300; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 301; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 302; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 303; CM-NEXT: ALU clause starting at 12: 304; CM-NEXT: MOV * T5.X, KC0[2].Z, 305; CM-NEXT: ALU clause starting at 13: 306; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 307; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00) 308; CM-NEXT: AND_INT * T1.W, PV.W, literal.x, 309; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 310; CM-NEXT: AND_INT T0.Z, T5.X, literal.x, 311; CM-NEXT: LSHL * T1.W, PV.W, literal.y, 312; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 313; CM-NEXT: LSHL T5.X, PV.Z, PV.W, 314; CM-NEXT: LSHL * T5.W, literal.x, PV.W, 315; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 316; CM-NEXT: MOV T5.Y, 0.0, 317; CM-NEXT: MOV * T5.Z, 0.0, 318; CM-NEXT: LSHL T0.Z, T7.X, literal.x, 319; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212 320; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41) 321; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W, 322; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 323; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 324; CM-NEXT: LSHR * T8.X, T0.W, literal.x, 325; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 326entry: 327 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 328 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 329 ret void 330} 331 332define amdgpu_kernel void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 333; GCN-NOHSA-SI-LABEL: global_load_v4i16: 334; GCN-NOHSA-SI: ; %bb.0: ; %entry 335; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 336; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 337; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 338; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 339; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 340; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 341; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 342; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 343; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 344; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 345; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 346; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 347; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 348; GCN-NOHSA-SI-NEXT: s_endpgm 349; 350; GCN-HSA-LABEL: global_load_v4i16: 351; GCN-HSA: ; %bb.0: ; %entry 352; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 353; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 354; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 355; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 356; GCN-HSA-NEXT: flat_load_dwordx2 v[2:3], v[2:3] 357; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 358; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 359; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 360; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 361; GCN-HSA-NEXT: s_endpgm 362; 363; GCN-NOHSA-VI-LABEL: global_load_v4i16: 364; GCN-NOHSA-VI: ; %bb.0: ; %entry 365; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 366; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 367; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 368; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 369; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 370; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 371; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 372; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 373; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 374; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 375; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 376; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 378; GCN-NOHSA-VI-NEXT: s_endpgm 379; 380; EG-LABEL: global_load_v4i16: 381; EG: ; %bb.0: ; %entry 382; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 383; EG-NEXT: TEX 0 @6 384; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 385; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 386; EG-NEXT: CF_END 387; EG-NEXT: PAD 388; EG-NEXT: Fetch clause starting at 6: 389; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 390; EG-NEXT: ALU clause starting at 8: 391; EG-NEXT: MOV * T0.X, KC0[2].Z, 392; EG-NEXT: ALU clause starting at 9: 393; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 394; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 395; 396; CM-LABEL: global_load_v4i16: 397; CM: ; %bb.0: ; %entry 398; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 399; CM-NEXT: TEX 0 @6 400; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 401; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 402; CM-NEXT: CF_END 403; CM-NEXT: PAD 404; CM-NEXT: Fetch clause starting at 6: 405; CM-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 406; CM-NEXT: ALU clause starting at 8: 407; CM-NEXT: MOV * T0.X, KC0[2].Z, 408; CM-NEXT: ALU clause starting at 9: 409; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 410; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 411entry: 412 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in 413 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 414 ret void 415} 416 417define amdgpu_kernel void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) { 418; GCN-NOHSA-SI-LABEL: global_load_v8i16: 419; GCN-NOHSA-SI: ; %bb.0: ; %entry 420; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 421; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 422; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 423; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 424; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 425; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 426; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 427; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 428; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 429; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 430; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 431; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 432; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 433; GCN-NOHSA-SI-NEXT: s_endpgm 434; 435; GCN-HSA-LABEL: global_load_v8i16: 436; GCN-HSA: ; %bb.0: ; %entry 437; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 438; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 439; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 440; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 441; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 442; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 443; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 444; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 445; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 446; GCN-HSA-NEXT: s_endpgm 447; 448; GCN-NOHSA-VI-LABEL: global_load_v8i16: 449; GCN-NOHSA-VI: ; %bb.0: ; %entry 450; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 451; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 452; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 453; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 454; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 455; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 456; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 457; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 458; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 459; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 460; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 461; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 462; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 463; GCN-NOHSA-VI-NEXT: s_endpgm 464; 465; EG-LABEL: global_load_v8i16: 466; EG: ; %bb.0: ; %entry 467; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 468; EG-NEXT: TEX 0 @6 469; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 470; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 471; EG-NEXT: CF_END 472; EG-NEXT: PAD 473; EG-NEXT: Fetch clause starting at 6: 474; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 475; EG-NEXT: ALU clause starting at 8: 476; EG-NEXT: MOV * T0.X, KC0[2].Z, 477; EG-NEXT: ALU clause starting at 9: 478; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 479; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 480; 481; CM-LABEL: global_load_v8i16: 482; CM: ; %bb.0: ; %entry 483; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 484; CM-NEXT: TEX 0 @6 485; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 486; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 487; CM-NEXT: CF_END 488; CM-NEXT: PAD 489; CM-NEXT: Fetch clause starting at 6: 490; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 491; CM-NEXT: ALU clause starting at 8: 492; CM-NEXT: MOV * T0.X, KC0[2].Z, 493; CM-NEXT: ALU clause starting at 9: 494; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 495; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 496entry: 497 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in 498 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 499 ret void 500} 501 502define amdgpu_kernel void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) { 503; GCN-NOHSA-SI-LABEL: global_load_v16i16: 504; GCN-NOHSA-SI: ; %bb.0: ; %entry 505; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 506; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 507; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 508; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 509; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 510; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 511; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 512; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 513; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 514; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 515; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 516; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 517; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 518; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 519; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 520; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 521; GCN-NOHSA-SI-NEXT: s_endpgm 522; 523; GCN-HSA-LABEL: global_load_v16i16: 524; GCN-HSA: ; %bb.0: ; %entry 525; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 526; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 527; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 528; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 529; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 530; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 531; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 532; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 533; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 534; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 535; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 536; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 537; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 538; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 539; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 540; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 541; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 542; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 543; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 544; GCN-HSA-NEXT: s_endpgm 545; 546; GCN-NOHSA-VI-LABEL: global_load_v16i16: 547; GCN-NOHSA-VI: ; %bb.0: ; %entry 548; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 549; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 550; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 551; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 552; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 553; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 554; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 555; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 556; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 557; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 558; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 559; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 560; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 561; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 562; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 563; GCN-NOHSA-VI-NEXT: s_endpgm 564; 565; EG-LABEL: global_load_v16i16: 566; EG: ; %bb.0: ; %entry 567; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 568; EG-NEXT: TEX 0 @8 569; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 570; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 571; EG-NEXT: TEX 0 @10 572; EG-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 573; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 574; EG-NEXT: CF_END 575; EG-NEXT: Fetch clause starting at 8: 576; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 577; EG-NEXT: Fetch clause starting at 10: 578; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 579; EG-NEXT: ALU clause starting at 12: 580; EG-NEXT: MOV * T0.X, KC0[2].Z, 581; EG-NEXT: ALU clause starting at 13: 582; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 583; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 584; EG-NEXT: ALU clause starting at 15: 585; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 586; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 587; EG-NEXT: LSHR * T1.X, PV.W, literal.x, 588; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 589; 590; CM-LABEL: global_load_v16i16: 591; CM: ; %bb.0: ; %entry 592; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 593; CM-NEXT: TEX 0 @8 594; CM-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 595; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 596; CM-NEXT: TEX 0 @10 597; CM-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 598; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 599; CM-NEXT: CF_END 600; CM-NEXT: Fetch clause starting at 8: 601; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 602; CM-NEXT: Fetch clause starting at 10: 603; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 604; CM-NEXT: ALU clause starting at 12: 605; CM-NEXT: MOV * T0.X, KC0[2].Z, 606; CM-NEXT: ALU clause starting at 13: 607; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 608; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 609; CM-NEXT: ALU clause starting at 15: 610; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 611; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 612; CM-NEXT: LSHR * T1.X, PV.W, literal.x, 613; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 614entry: 615 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in 616 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 617 ret void 618} 619 620define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 { 621; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2: 622; GCN-NOHSA-SI: ; %bb.0: ; %entry 623; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 624; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 625; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 626; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 627; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 628; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 629; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 630; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 631; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 632; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 633; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 634; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 offset:2 635; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[4:7], 0 offset:4 636; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:6 637; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[4:7], 0 offset:8 638; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 offset:10 639; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[4:7], 0 offset:12 640; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[4:7], 0 offset:14 641; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[4:7], 0 offset:16 642; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[4:7], 0 offset:18 643; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[4:7], 0 offset:20 644; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[4:7], 0 offset:22 645; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[4:7], 0 offset:24 646; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[4:7], 0 offset:26 647; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[4:7], 0 offset:28 648; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[4:7], 0 offset:30 649; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 650; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 651; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 652; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 653; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 654; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 655; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 656; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 657; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 658; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 659; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 660; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 661; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 662; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 663; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 664; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 665; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 666; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 667; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16 668; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 669; GCN-NOHSA-SI-NEXT: s_endpgm 670; 671; GCN-HSA-LABEL: global_load_v16i16_align2: 672; GCN-HSA: ; %bb.0: ; %entry 673; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 674; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 675; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 676; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 677; GCN-HSA-NEXT: s_add_u32 s0, s0, 16 678; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 679; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 680; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 681; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 682; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 683; GCN-HSA-NEXT: s_add_u32 s0, s2, 16 684; GCN-HSA-NEXT: s_addc_u32 s1, s3, 0 685; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 686; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 687; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 688; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 689; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 690; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 691; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 692; GCN-HSA-NEXT: s_endpgm 693; 694; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2: 695; GCN-NOHSA-VI: ; %bb.0: ; %entry 696; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 697; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 698; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 699; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 700; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 701; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 702; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 703; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 704; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 705; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 706; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 707; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 708; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 709; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 710; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 711; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 712; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 713; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 714; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 715; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 716; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 717; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 718; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s6 719; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s7 720; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 721; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 722; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v18, v0 723; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 724; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 725; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v17, v4 726; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 727; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 728; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v16, v5 729; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 730; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 731; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v7, v6 732; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 733; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 734; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v9, v8 735; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 736; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 737; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v11, v10 738; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 739; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 740; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v12 741; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 742; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 743; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v15, v14 744; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 745; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 746; GCN-NOHSA-VI-NEXT: s_endpgm 747; 748; EG-LABEL: global_load_v16i16_align2: 749; EG: ; %bb.0: ; %entry 750; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 751; EG-NEXT: TEX 1 @6 752; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] 753; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 754; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 755; EG-NEXT: CF_END 756; EG-NEXT: Fetch clause starting at 6: 757; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 758; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 759; EG-NEXT: ALU clause starting at 10: 760; EG-NEXT: MOV * T0.X, KC0[2].Y, 761; EG-NEXT: ALU clause starting at 11: 762; EG-NEXT: LSHR T2.X, KC0[2].Z, literal.x, 763; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.y, 764; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 765; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 766; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 767; 768; CM-LABEL: global_load_v16i16_align2: 769; CM: ; %bb.0: ; %entry 770; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 771; CM-NEXT: TEX 1 @6 772; CM-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] 773; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X 774; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 775; CM-NEXT: CF_END 776; CM-NEXT: Fetch clause starting at 6: 777; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 778; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 779; CM-NEXT: ALU clause starting at 10: 780; CM-NEXT: MOV * T0.X, KC0[2].Y, 781; CM-NEXT: ALU clause starting at 11: 782; CM-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, 783; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 784; CM-NEXT: LSHR * T2.X, PV.W, literal.x, 785; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 786; CM-NEXT: LSHR * T3.X, KC0[2].Z, literal.x, 787; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 788entry: 789 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2 790 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32 791 ret void 792} 793 794define amdgpu_kernel void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 795; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32: 796; GCN-NOHSA-SI: ; %bb.0: 797; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 798; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 799; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 800; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 801; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 802; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 803; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 804; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 805; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 806; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 807; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 808; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 809; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 810; GCN-NOHSA-SI-NEXT: s_endpgm 811; 812; GCN-HSA-LABEL: global_zextload_i16_to_i32: 813; GCN-HSA: ; %bb.0: 814; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 815; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 816; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 817; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 818; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 819; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 820; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 821; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 822; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 823; GCN-HSA-NEXT: s_endpgm 824; 825; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32: 826; GCN-NOHSA-VI: ; %bb.0: 827; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 828; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 829; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 830; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 831; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 832; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 833; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 834; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 835; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 836; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 837; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 838; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 839; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 840; GCN-NOHSA-VI-NEXT: s_endpgm 841; 842; EG-LABEL: global_zextload_i16_to_i32: 843; EG: ; %bb.0: 844; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 845; EG-NEXT: TEX 0 @6 846; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 847; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 848; EG-NEXT: CF_END 849; EG-NEXT: PAD 850; EG-NEXT: Fetch clause starting at 6: 851; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 852; EG-NEXT: ALU clause starting at 8: 853; EG-NEXT: MOV * T0.X, KC0[2].Z, 854; EG-NEXT: ALU clause starting at 9: 855; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 856; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 857; 858; CM-LABEL: global_zextload_i16_to_i32: 859; CM: ; %bb.0: 860; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 861; CM-NEXT: TEX 0 @6 862; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 863; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 864; CM-NEXT: CF_END 865; CM-NEXT: PAD 866; CM-NEXT: Fetch clause starting at 6: 867; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 868; CM-NEXT: ALU clause starting at 8: 869; CM-NEXT: MOV * T0.X, KC0[2].Z, 870; CM-NEXT: ALU clause starting at 9: 871; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 872; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 873 %a = load i16, i16 addrspace(1)* %in 874 %ext = zext i16 %a to i32 875 store i32 %ext, i32 addrspace(1)* %out 876 ret void 877} 878 879define amdgpu_kernel void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 880; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32: 881; GCN-NOHSA-SI: ; %bb.0: 882; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 883; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 884; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 885; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 886; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 887; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 888; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 889; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 890; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 891; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 892; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 893; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 894; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 895; GCN-NOHSA-SI-NEXT: s_endpgm 896; 897; GCN-HSA-LABEL: global_sextload_i16_to_i32: 898; GCN-HSA: ; %bb.0: 899; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 900; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 901; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 902; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 903; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 904; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 905; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 906; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 907; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 908; GCN-HSA-NEXT: s_endpgm 909; 910; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32: 911; GCN-NOHSA-VI: ; %bb.0: 912; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 913; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 914; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 915; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 916; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 917; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 918; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 919; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 920; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 921; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 922; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0 923; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 924; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 925; GCN-NOHSA-VI-NEXT: s_endpgm 926; 927; EG-LABEL: global_sextload_i16_to_i32: 928; EG: ; %bb.0: 929; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 930; EG-NEXT: TEX 0 @6 931; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 932; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 933; EG-NEXT: CF_END 934; EG-NEXT: PAD 935; EG-NEXT: Fetch clause starting at 6: 936; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 937; EG-NEXT: ALU clause starting at 8: 938; EG-NEXT: MOV * T0.X, KC0[2].Z, 939; EG-NEXT: ALU clause starting at 9: 940; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 941; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 942; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 943; 944; CM-LABEL: global_sextload_i16_to_i32: 945; CM: ; %bb.0: 946; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 947; CM-NEXT: TEX 0 @6 948; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 949; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 950; CM-NEXT: CF_END 951; CM-NEXT: PAD 952; CM-NEXT: Fetch clause starting at 6: 953; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 954; CM-NEXT: ALU clause starting at 8: 955; CM-NEXT: MOV * T0.X, KC0[2].Z, 956; CM-NEXT: ALU clause starting at 9: 957; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 958; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 959; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 960; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 961 %a = load i16, i16 addrspace(1)* %in 962 %ext = sext i16 %a to i32 963 store i32 %ext, i32 addrspace(1)* %out 964 ret void 965} 966 967define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 968; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32: 969; GCN-NOHSA-SI: ; %bb.0: 970; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 971; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 972; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 973; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 974; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 975; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 976; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 977; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 978; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 979; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 980; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 981; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 982; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 983; GCN-NOHSA-SI-NEXT: s_endpgm 984; 985; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32: 986; GCN-HSA: ; %bb.0: 987; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 988; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 989; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 990; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 991; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 992; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 993; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 994; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 995; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 996; GCN-HSA-NEXT: s_endpgm 997; 998; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32: 999; GCN-NOHSA-VI: ; %bb.0: 1000; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1001; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1002; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1003; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1004; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1005; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1006; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 1007; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 1008; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 1009; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 1010; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 1011; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1012; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1013; GCN-NOHSA-VI-NEXT: s_endpgm 1014; 1015; EG-LABEL: global_zextload_v1i16_to_v1i32: 1016; EG: ; %bb.0: 1017; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1018; EG-NEXT: TEX 0 @6 1019; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1020; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1021; EG-NEXT: CF_END 1022; EG-NEXT: PAD 1023; EG-NEXT: Fetch clause starting at 6: 1024; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1025; EG-NEXT: ALU clause starting at 8: 1026; EG-NEXT: MOV * T0.X, KC0[2].Z, 1027; EG-NEXT: ALU clause starting at 9: 1028; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1029; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1030; 1031; CM-LABEL: global_zextload_v1i16_to_v1i32: 1032; CM: ; %bb.0: 1033; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1034; CM-NEXT: TEX 0 @6 1035; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1036; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1037; CM-NEXT: CF_END 1038; CM-NEXT: PAD 1039; CM-NEXT: Fetch clause starting at 6: 1040; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1041; CM-NEXT: ALU clause starting at 8: 1042; CM-NEXT: MOV * T0.X, KC0[2].Z, 1043; CM-NEXT: ALU clause starting at 9: 1044; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1045; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1046 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 1047 %ext = zext <1 x i16> %load to <1 x i32> 1048 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 1049 ret void 1050} 1051 1052define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 1053; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32: 1054; GCN-NOHSA-SI: ; %bb.0: 1055; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1056; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1057; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1058; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1059; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1060; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1061; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1062; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1063; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1064; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1065; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1066; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1067; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1068; GCN-NOHSA-SI-NEXT: s_endpgm 1069; 1070; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32: 1071; GCN-HSA: ; %bb.0: 1072; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1073; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1074; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 1075; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 1076; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 1077; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1078; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1079; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1080; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1081; GCN-HSA-NEXT: s_endpgm 1082; 1083; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32: 1084; GCN-NOHSA-VI: ; %bb.0: 1085; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1086; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1087; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1088; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1089; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1090; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1091; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 1092; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 1093; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 1094; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 1095; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[4:7], 0 1096; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1097; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1098; GCN-NOHSA-VI-NEXT: s_endpgm 1099; 1100; EG-LABEL: global_sextload_v1i16_to_v1i32: 1101; EG: ; %bb.0: 1102; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1103; EG-NEXT: TEX 0 @6 1104; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 1105; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1106; EG-NEXT: CF_END 1107; EG-NEXT: PAD 1108; EG-NEXT: Fetch clause starting at 6: 1109; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1110; EG-NEXT: ALU clause starting at 8: 1111; EG-NEXT: MOV * T0.X, KC0[2].Z, 1112; EG-NEXT: ALU clause starting at 9: 1113; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1114; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1115; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1116; 1117; CM-LABEL: global_sextload_v1i16_to_v1i32: 1118; CM: ; %bb.0: 1119; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1120; CM-NEXT: TEX 0 @6 1121; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 1122; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1123; CM-NEXT: CF_END 1124; CM-NEXT: PAD 1125; CM-NEXT: Fetch clause starting at 6: 1126; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1127; CM-NEXT: ALU clause starting at 8: 1128; CM-NEXT: MOV * T0.X, KC0[2].Z, 1129; CM-NEXT: ALU clause starting at 9: 1130; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 1131; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1132; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1133; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1134 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 1135 %ext = sext <1 x i16> %load to <1 x i32> 1136 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 1137 ret void 1138} 1139 1140define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 1141; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32: 1142; GCN-NOHSA-SI: ; %bb.0: 1143; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1144; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1145; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1146; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1147; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1148; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1149; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1150; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1151; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1152; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1153; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1154; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1155; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1156; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1157; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1158; GCN-NOHSA-SI-NEXT: s_endpgm 1159; 1160; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32: 1161; GCN-HSA: ; %bb.0: 1162; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1163; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1164; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1165; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1166; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1167; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1168; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1169; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1170; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1171; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 1172; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1173; GCN-HSA-NEXT: s_endpgm 1174; 1175; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32: 1176; GCN-NOHSA-VI: ; %bb.0: 1177; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1178; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1179; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1180; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1181; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1182; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1183; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1184; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1185; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1186; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1187; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1188; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1189; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1190; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1191; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1192; GCN-NOHSA-VI-NEXT: s_endpgm 1193; 1194; EG-LABEL: global_zextload_v2i16_to_v2i32: 1195; EG: ; %bb.0: 1196; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1197; EG-NEXT: TEX 0 @6 1198; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 1199; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 1200; EG-NEXT: CF_END 1201; EG-NEXT: PAD 1202; EG-NEXT: Fetch clause starting at 6: 1203; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1204; EG-NEXT: ALU clause starting at 8: 1205; EG-NEXT: MOV * T4.X, KC0[2].Z, 1206; EG-NEXT: ALU clause starting at 9: 1207; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 1208; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1209; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 1210; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 1211; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1212; 1213; CM-LABEL: global_zextload_v2i16_to_v2i32: 1214; CM: ; %bb.0: 1215; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1216; CM-NEXT: TEX 0 @6 1217; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1218; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 1219; CM-NEXT: CF_END 1220; CM-NEXT: PAD 1221; CM-NEXT: Fetch clause starting at 6: 1222; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1223; CM-NEXT: ALU clause starting at 8: 1224; CM-NEXT: MOV * T4.X, KC0[2].Z, 1225; CM-NEXT: ALU clause starting at 9: 1226; CM-NEXT: LSHR * T4.Y, T4.X, literal.x, 1227; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1228; CM-NEXT: AND_INT * T4.X, T4.X, literal.x, 1229; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1230; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 1231; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1232 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 1233 %ext = zext <2 x i16> %load to <2 x i32> 1234 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1235 ret void 1236} 1237 1238; TODO: This should use ASHR instead of LSHR + BFE 1239define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 1240; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32: 1241; GCN-NOHSA-SI: ; %bb.0: 1242; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1243; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1244; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1245; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1246; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1247; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1248; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1249; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1250; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1251; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1252; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1253; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1254; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1255; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 1256; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1257; GCN-NOHSA-SI-NEXT: s_endpgm 1258; 1259; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32: 1260; GCN-HSA: ; %bb.0: 1261; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1262; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1263; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1264; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1265; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1266; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1267; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1268; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1269; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v2 1270; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 1271; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1272; GCN-HSA-NEXT: s_endpgm 1273; 1274; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32: 1275; GCN-NOHSA-VI: ; %bb.0: 1276; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1277; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1278; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1279; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1280; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1281; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1282; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1283; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1284; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1285; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1286; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1287; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1288; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1289; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 1290; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1291; GCN-NOHSA-VI-NEXT: s_endpgm 1292; 1293; EG-LABEL: global_sextload_v2i16_to_v2i32: 1294; EG: ; %bb.0: 1295; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1296; EG-NEXT: TEX 0 @6 1297; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1298; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1299; EG-NEXT: CF_END 1300; EG-NEXT: PAD 1301; EG-NEXT: Fetch clause starting at 6: 1302; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1303; EG-NEXT: ALU clause starting at 8: 1304; EG-NEXT: MOV * T4.X, KC0[2].Z, 1305; EG-NEXT: ALU clause starting at 9: 1306; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1307; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1308; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1309; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1310; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1311; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1312; 1313; CM-LABEL: global_sextload_v2i16_to_v2i32: 1314; CM: ; %bb.0: 1315; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1316; CM-NEXT: TEX 0 @6 1317; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1318; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X 1319; CM-NEXT: CF_END 1320; CM-NEXT: PAD 1321; CM-NEXT: Fetch clause starting at 6: 1322; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1323; CM-NEXT: ALU clause starting at 8: 1324; CM-NEXT: MOV * T4.X, KC0[2].Z, 1325; CM-NEXT: ALU clause starting at 9: 1326; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1327; CM-NEXT: LSHR * T0.W, T4.X, literal.x, 1328; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1329; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1330; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y, 1331; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1332 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 1333 %ext = sext <2 x i16> %load to <2 x i32> 1334 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1335 ret void 1336} 1337 1338define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 1339; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32: 1340; GCN-NOHSA-SI: ; %bb.0: ; %entry 1341; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1342; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1343; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1344; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1345; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1346; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1347; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1348; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1349; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1350; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 1351; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1352; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1353; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1354; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1355; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, s2, v0 1356; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s2, v1 1357; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1358; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1359; GCN-NOHSA-SI-NEXT: s_endpgm 1360; 1361; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32: 1362; GCN-HSA: ; %bb.0: ; %entry 1363; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1364; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1365; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1366; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1367; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1368; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 1369; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1370; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1371; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1372; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v3 1373; GCN-HSA-NEXT: v_and_b32_e32 v2, s2, v4 1374; GCN-HSA-NEXT: v_and_b32_e32 v0, s2, v3 1375; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1376; GCN-HSA-NEXT: s_endpgm 1377; 1378; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32: 1379; GCN-NOHSA-VI: ; %bb.0: ; %entry 1380; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1381; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1382; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1383; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1384; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1385; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1386; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1387; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1388; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1389; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 1390; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1391; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1392; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1393; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, s6, v1 1394; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1395; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s6, v0 1396; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1397; GCN-NOHSA-VI-NEXT: s_endpgm 1398; 1399; EG-LABEL: global_zextload_v3i16_to_v3i32: 1400; EG: ; %bb.0: ; %entry 1401; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1402; EG-NEXT: TEX 2 @6 1403; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1404; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1406; EG-NEXT: CF_END 1407; EG-NEXT: Fetch clause starting at 6: 1408; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1409; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1410; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1411; EG-NEXT: ALU clause starting at 12: 1412; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1413; EG-NEXT: MOV * T1.X, KC0[2].Z, 1414; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1415; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1416; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1417; EG-NEXT: ALU clause starting at 17: 1418; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1419; EG-NEXT: MOV * T3.Y, T1.X, 1420; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1421; 1422; CM-LABEL: global_zextload_v3i16_to_v3i32: 1423; CM: ; %bb.0: ; %entry 1424; CM-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1425; CM-NEXT: TEX 2 @6 1426; CM-NEXT: ALU 2, @17, KC0[CB0:0-32], KC1[] 1427; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T4.X 1428; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X 1429; CM-NEXT: CF_END 1430; CM-NEXT: Fetch clause starting at 6: 1431; CM-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1432; CM-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1433; CM-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1434; CM-NEXT: ALU clause starting at 12: 1435; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1436; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1437; CM-NEXT: LSHR * T0.X, PV.W, literal.x, 1438; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1439; CM-NEXT: MOV * T1.X, KC0[2].Z, 1440; CM-NEXT: ALU clause starting at 17: 1441; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1442; CM-NEXT: MOV * T3.Y, T1.X, 1443; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1444entry: 1445 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 1446 %ext = zext <3 x i16> %ld to <3 x i32> 1447 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1448 ret void 1449} 1450 1451define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 1452; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32: 1453; GCN-NOHSA-SI: ; %bb.0: ; %entry 1454; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1455; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1456; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1457; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1458; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1459; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1460; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1461; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1462; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1463; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1464; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1465; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1466; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 1467; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v0, 0, 16 1468; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 1469; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1470; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1471; GCN-NOHSA-SI-NEXT: s_endpgm 1472; 1473; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32: 1474; GCN-HSA: ; %bb.0: ; %entry 1475; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1476; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1477; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1478; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1479; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1480; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1481; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1482; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1483; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1484; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1485; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1486; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1487; GCN-HSA-NEXT: s_endpgm 1488; 1489; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32: 1490; GCN-NOHSA-VI: ; %bb.0: ; %entry 1491; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1492; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1493; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1494; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1495; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1496; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1497; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1498; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1499; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1500; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1501; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1502; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1503; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1504; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v4, 0, 16 1505; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v3, 0, 16 1506; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1507; GCN-NOHSA-VI-NEXT: s_endpgm 1508; 1509; EG-LABEL: global_sextload_v3i16_to_v3i32: 1510; EG: ; %bb.0: ; %entry 1511; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1512; EG-NEXT: TEX 2 @6 1513; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1514; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1515; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1516; EG-NEXT: CF_END 1517; EG-NEXT: Fetch clause starting at 6: 1518; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1519; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1520; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1521; EG-NEXT: ALU clause starting at 12: 1522; EG-NEXT: MOV * T0.X, KC0[2].Z, 1523; EG-NEXT: ALU clause starting at 13: 1524; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1525; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1526; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1527; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1528; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1529; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1530; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1531; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1532; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1533; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1534; 1535; CM-LABEL: global_sextload_v3i16_to_v3i32: 1536; CM: ; %bb.0: ; %entry 1537; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1538; CM-NEXT: TEX 2 @6 1539; CM-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1540; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X 1541; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X 1542; CM-NEXT: CF_END 1543; CM-NEXT: Fetch clause starting at 6: 1544; CM-NEXT: VTX_READ_16 T1.X, T0.X, 4, #1 1545; CM-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1 1546; CM-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 1547; CM-NEXT: ALU clause starting at 12: 1548; CM-NEXT: MOV * T0.X, KC0[2].Z, 1549; CM-NEXT: ALU clause starting at 13: 1550; CM-NEXT: BFE_INT T1.X, T1.X, 0.0, literal.x, 1551; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1552; CM-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1553; CM-NEXT: LSHR T3.X, PV.W, literal.x, 1554; CM-NEXT: BFE_INT * T0.Y, T0.X, 0.0, literal.y, 1555; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1556; CM-NEXT: BFE_INT * T0.X, T2.X, 0.0, literal.x, 1557; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1558; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1559; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1560entry: 1561 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 1562 %ext = sext <3 x i16> %ld to <3 x i32> 1563 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1564 ret void 1565} 1566 1567; TODO: This should use DST, but for some there are redundant MOVs 1568define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 1569; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32: 1570; GCN-NOHSA-SI: ; %bb.0: 1571; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1572; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1573; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1574; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1575; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1576; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1577; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1578; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1579; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1580; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 1581; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1582; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1583; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1584; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1585; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1586; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, s2, v5 1587; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s2, v4 1588; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1589; GCN-NOHSA-SI-NEXT: s_endpgm 1590; 1591; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32: 1592; GCN-HSA: ; %bb.0: 1593; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1594; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1595; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1596; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1597; GCN-HSA-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 1598; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 1599; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0 1600; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1 1601; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1602; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1603; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1604; GCN-HSA-NEXT: v_and_b32_e32 v2, s2, v5 1605; GCN-HSA-NEXT: v_and_b32_e32 v0, s2, v4 1606; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 1607; GCN-HSA-NEXT: s_endpgm 1608; 1609; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32: 1610; GCN-NOHSA-VI: ; %bb.0: 1611; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1612; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1613; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1614; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1615; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1616; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1617; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1618; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1619; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1620; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 1621; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1622; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1623; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1624; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1625; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, s6, v1 1626; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1627; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s6, v0 1628; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1629; GCN-NOHSA-VI-NEXT: s_endpgm 1630; 1631; EG-LABEL: global_zextload_v4i16_to_v4i32: 1632; EG: ; %bb.0: 1633; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1634; EG-NEXT: TEX 0 @6 1635; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1636; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1637; EG-NEXT: CF_END 1638; EG-NEXT: PAD 1639; EG-NEXT: Fetch clause starting at 6: 1640; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1641; EG-NEXT: ALU clause starting at 8: 1642; EG-NEXT: MOV * T5.X, KC0[2].Z, 1643; EG-NEXT: ALU clause starting at 9: 1644; EG-NEXT: MOV T2.X, T5.X, 1645; EG-NEXT: MOV * T3.X, T5.Y, 1646; EG-NEXT: MOV T0.Y, PV.X, 1647; EG-NEXT: MOV * T0.Z, PS, 1648; EG-NEXT: LSHR * T5.W, PV.Z, literal.x, 1649; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1650; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1651; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1652; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1653; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1654; EG-NEXT: AND_INT T5.X, T0.Y, literal.x, 1655; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1656; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1657; 1658; CM-LABEL: global_zextload_v4i16_to_v4i32: 1659; CM: ; %bb.0: 1660; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1661; CM-NEXT: TEX 0 @6 1662; CM-NEXT: ALU 13, @9, KC0[CB0:0-32], KC1[] 1663; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 1664; CM-NEXT: CF_END 1665; CM-NEXT: PAD 1666; CM-NEXT: Fetch clause starting at 6: 1667; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1668; CM-NEXT: ALU clause starting at 8: 1669; CM-NEXT: MOV * T5.X, KC0[2].Z, 1670; CM-NEXT: ALU clause starting at 9: 1671; CM-NEXT: MOV * T2.X, T5.X, 1672; CM-NEXT: MOV T3.X, T5.Y, 1673; CM-NEXT: MOV * T0.Y, PV.X, 1674; CM-NEXT: MOV * T0.Z, PV.X, 1675; CM-NEXT: LSHR * T5.W, PV.Z, literal.x, 1676; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1677; CM-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1678; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1679; CM-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1680; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1681; CM-NEXT: AND_INT * T5.X, T0.Y, literal.x, 1682; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1683; CM-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 1684; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1685 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 1686 %ext = zext <4 x i16> %load to <4 x i32> 1687 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1688 ret void 1689} 1690 1691; TODO: We should use ASHR instead of LSHR + BFE 1692; TODO: This should use DST, but for some there are redundant MOVs 1693define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 1694; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32: 1695; GCN-NOHSA-SI: ; %bb.0: 1696; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1697; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1698; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1699; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1700; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1701; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1702; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1703; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1704; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1705; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1706; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1707; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1708; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1709; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[5:6], v[3:4], 48 1710; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v4, 0, 16 1711; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v3, 0, 16 1712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v5 1713; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1714; GCN-NOHSA-SI-NEXT: s_endpgm 1715; 1716; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32: 1717; GCN-HSA: ; %bb.0: 1718; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1719; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1720; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1721; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1722; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1723; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1724; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1725; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1726; GCN-HSA-NEXT: v_ashr_i64 v[7:8], v[3:4], 48 1727; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1728; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1729; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1730; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7 1731; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[0:3] 1732; GCN-HSA-NEXT: s_endpgm 1733; 1734; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32: 1735; GCN-NOHSA-VI: ; %bb.0: 1736; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1737; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1738; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1739; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1740; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1741; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1742; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1743; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1744; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1745; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1746; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1747; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1748; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 1749; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 1750; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 1751; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 1752; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1753; GCN-NOHSA-VI-NEXT: s_endpgm 1754; 1755; EG-LABEL: global_sextload_v4i16_to_v4i32: 1756; EG: ; %bb.0: 1757; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1758; EG-NEXT: TEX 0 @6 1759; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1760; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1761; EG-NEXT: CF_END 1762; EG-NEXT: PAD 1763; EG-NEXT: Fetch clause starting at 6: 1764; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1765; EG-NEXT: ALU clause starting at 8: 1766; EG-NEXT: MOV * T5.X, KC0[2].Z, 1767; EG-NEXT: ALU clause starting at 9: 1768; EG-NEXT: MOV T2.X, T5.X, 1769; EG-NEXT: MOV * T3.X, T5.Y, 1770; EG-NEXT: MOV T0.Y, PV.X, 1771; EG-NEXT: MOV * T0.Z, PS, 1772; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1773; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1774; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1775; EG-NEXT: LSHR * T0.W, T0.Z, literal.x, 1776; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1777; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1778; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, 1779; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1780; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1781; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y, 1782; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1783; 1784; CM-LABEL: global_sextload_v4i16_to_v4i32: 1785; CM: ; %bb.0: 1786; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1787; CM-NEXT: TEX 0 @6 1788; CM-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1789; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 1790; CM-NEXT: CF_END 1791; CM-NEXT: PAD 1792; CM-NEXT: Fetch clause starting at 6: 1793; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1794; CM-NEXT: ALU clause starting at 8: 1795; CM-NEXT: MOV * T5.X, KC0[2].Z, 1796; CM-NEXT: ALU clause starting at 9: 1797; CM-NEXT: MOV * T2.X, T5.X, 1798; CM-NEXT: MOV T3.X, T5.Y, 1799; CM-NEXT: MOV * T0.Y, PV.X, 1800; CM-NEXT: MOV * T0.Z, PV.X, 1801; CM-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1802; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1803; CM-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1804; CM-NEXT: LSHR * T0.W, T0.Z, literal.x, 1805; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1806; CM-NEXT: LSHR T0.Z, T0.Y, literal.x, 1807; CM-NEXT: BFE_INT * T5.W, PV.W, 0.0, literal.x, 1808; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1809; CM-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1810; CM-NEXT: BFE_INT * T5.Y, PV.Z, 0.0, literal.y, 1811; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1812 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 1813 %ext = sext <4 x i16> %load to <4 x i32> 1814 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1815 ret void 1816} 1817 1818; TODO: These should use LSHR instead of BFE_UINT 1819define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 1820; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32: 1821; GCN-NOHSA-SI: ; %bb.0: 1822; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1823; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1824; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1825; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1826; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1827; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1828; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1829; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1830; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1831; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 1832; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1833; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1834; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1835; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1836; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1837; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1838; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1839; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, s2, v1 1840; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s2, v0 1841; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, s2, v3 1842; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s2, v2 1843; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1844; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1845; GCN-NOHSA-SI-NEXT: s_endpgm 1846; 1847; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32: 1848; GCN-HSA: ; %bb.0: 1849; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1850; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 1851; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1852; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1853; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1854; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 1855; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1856; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1857; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 1858; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 1859; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 1860; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 1861; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1862; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1863; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1864; GCN-HSA-NEXT: v_and_b32_e32 v10, s4, v3 1865; GCN-HSA-NEXT: v_and_b32_e32 v8, s4, v2 1866; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1867; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1868; GCN-HSA-NEXT: v_and_b32_e32 v6, s4, v1 1869; GCN-HSA-NEXT: v_and_b32_e32 v4, s4, v0 1870; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 1871; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 1872; GCN-HSA-NEXT: s_endpgm 1873; 1874; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32: 1875; GCN-NOHSA-VI: ; %bb.0: 1876; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1877; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1878; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1879; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 1880; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 1881; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1882; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 1883; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 1884; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1885; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 1886; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 1887; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 1888; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1889; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1890; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, s6, v3 1891; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1892; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s6, v2 1893; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1894; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, s6, v1 1895; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1896; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s6, v0 1897; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 1898; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 1899; GCN-NOHSA-VI-NEXT: s_endpgm 1900; 1901; EG-LABEL: global_zextload_v8i16_to_v8i32: 1902; EG: ; %bb.0: 1903; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1904; EG-NEXT: TEX 0 @6 1905; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1906; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1907; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1908; EG-NEXT: CF_END 1909; EG-NEXT: Fetch clause starting at 6: 1910; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1911; EG-NEXT: ALU clause starting at 8: 1912; EG-NEXT: MOV * T7.X, KC0[2].Z, 1913; EG-NEXT: ALU clause starting at 9: 1914; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1915; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1916; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1917; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1918; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1919; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1920; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1921; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1922; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1923; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1924; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1925; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1926; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1927; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1928; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1929; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1930; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1931; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1932; 1933; CM-LABEL: global_zextload_v8i16_to_v8i32: 1934; CM: ; %bb.0: 1935; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1936; CM-NEXT: TEX 0 @6 1937; CM-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1938; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T10.X 1939; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T9.X 1940; CM-NEXT: CF_END 1941; CM-NEXT: Fetch clause starting at 6: 1942; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1943; CM-NEXT: ALU clause starting at 8: 1944; CM-NEXT: MOV * T7.X, KC0[2].Z, 1945; CM-NEXT: ALU clause starting at 9: 1946; CM-NEXT: LSHR * T8.W, T7.W, literal.x, 1947; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1948; CM-NEXT: AND_INT * T8.Z, T7.W, literal.x, 1949; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1950; CM-NEXT: LSHR T8.Y, T7.Z, literal.x, 1951; CM-NEXT: LSHR * T7.W, T7.Y, literal.x, 1952; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1953; CM-NEXT: AND_INT T8.X, T7.Z, literal.x, 1954; CM-NEXT: AND_INT T7.Z, T7.Y, literal.x, 1955; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1956; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1957; CM-NEXT: LSHR T9.X, PV.W, literal.x, 1958; CM-NEXT: LSHR * T7.Y, T7.X, literal.y, 1959; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1960; CM-NEXT: AND_INT * T7.X, T7.X, literal.x, 1961; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1962; CM-NEXT: LSHR * T10.X, KC0[2].Y, literal.x, 1963; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1964 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 1965 %ext = zext <8 x i16> %load to <8 x i32> 1966 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1967 ret void 1968} 1969 1970; TODO: These should use ASHR instead of LSHR + BFE_INT 1971define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 1972; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32: 1973; GCN-NOHSA-SI: ; %bb.0: 1974; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1975; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1976; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1977; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1978; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1979; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1980; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1981; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1982; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1983; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1984; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1985; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1986; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 1987; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 1988; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16 1989; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 1990; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 1991; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 1992; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v3, 0, 16 1993; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16 1994; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1995; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1996; GCN-NOHSA-SI-NEXT: s_endpgm 1997; 1998; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32: 1999; GCN-HSA: ; %bb.0: 2000; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2001; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2002; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2003; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2004; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2005; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2006; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2007; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 2008; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 2009; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 2010; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 2011; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 2012; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v3 2013; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v2 2014; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16 2015; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 2016; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2017; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2018; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 2019; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 2020; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 2021; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 2022; GCN-HSA-NEXT: s_endpgm 2023; 2024; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32: 2025; GCN-NOHSA-VI: ; %bb.0: 2026; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2027; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2028; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2029; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2030; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2031; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2032; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2033; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2034; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2035; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2036; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2037; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2038; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 2039; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 2040; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16 2041; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 2042; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2043; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2044; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 2045; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 2046; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 2047; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 2048; GCN-NOHSA-VI-NEXT: s_endpgm 2049; 2050; EG-LABEL: global_sextload_v8i16_to_v8i32: 2051; EG: ; %bb.0: 2052; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2053; EG-NEXT: TEX 0 @6 2054; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2055; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 2056; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 2057; EG-NEXT: CF_END 2058; EG-NEXT: Fetch clause starting at 6: 2059; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2060; EG-NEXT: ALU clause starting at 8: 2061; EG-NEXT: MOV * T7.X, KC0[2].Z, 2062; EG-NEXT: ALU clause starting at 9: 2063; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 2064; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2065; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 2066; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 2067; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 2068; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2069; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 2070; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 2071; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 2072; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 2073; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2074; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2075; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 2076; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 2077; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 2078; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2079; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2080; EG-NEXT: LSHR T10.X, PS, literal.x, 2081; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2082; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2083; 2084; CM-LABEL: global_sextload_v8i16_to_v8i32: 2085; CM: ; %bb.0: 2086; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2087; CM-NEXT: TEX 0 @6 2088; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2089; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X 2090; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X 2091; CM-NEXT: CF_END 2092; CM-NEXT: Fetch clause starting at 6: 2093; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2094; CM-NEXT: ALU clause starting at 8: 2095; CM-NEXT: MOV * T7.X, KC0[2].Z, 2096; CM-NEXT: ALU clause starting at 9: 2097; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x, 2098; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2099; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x, 2100; CM-NEXT: LSHR T0.Y, T7.Y, literal.x, 2101; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x, 2102; CM-NEXT: LSHR * T0.W, T7.W, literal.x, 2103; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2104; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x, 2105; CM-NEXT: LSHR T1.Y, T7.Z, literal.x, 2106; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 2107; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x, 2108; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2109; CM-NEXT: LSHR T10.X, PV.Z, literal.x, 2110; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y, 2111; CM-NEXT: LSHR T0.Z, T7.X, literal.y, 2112; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y, 2113; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2114; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2115; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2116; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2117 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 2118 %ext = sext <8 x i16> %load to <8 x i32> 2119 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 2120 ret void 2121} 2122 2123define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 2124; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32: 2125; GCN-NOHSA-SI: ; %bb.0: 2126; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 2127; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 2128; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 2129; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 2130; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 2131; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2132; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 2133; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 2134; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2135; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 2136; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 2137; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 2138; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2139; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2140; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2141; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2142; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2143; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2144; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2145; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2146; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2147; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, s2, v1 2148; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s2, v0 2149; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, s2, v3 2150; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, s2, v2 2151; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v7 2152; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 2153; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, s2, v5 2154; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, s2, v4 2155; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, s2, v7 2156; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s2, v6 2157; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:48 2158; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:32 2159; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16 2160; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2161; GCN-NOHSA-SI-NEXT: s_endpgm 2162; 2163; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32: 2164; GCN-HSA: ; %bb.0: 2165; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2166; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2167; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 2168; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2169; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2170; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2171; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2172; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2173; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2174; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2175; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2176; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2177; GCN-HSA-NEXT: v_mov_b32_e32 v12, s3 2178; GCN-HSA-NEXT: v_mov_b32_e32 v11, s2 2179; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2180; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2181; GCN-HSA-NEXT: v_mov_b32_e32 v15, s1 2182; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 2183; GCN-HSA-NEXT: v_mov_b32_e32 v14, s0 2184; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2185; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2186; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 2187; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 2188; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2189; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v3 2190; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 2191; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v7 2192; GCN-HSA-NEXT: v_and_b32_e32 v9, s4, v7 2193; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6 2194; GCN-HSA-NEXT: v_and_b32_e32 v7, s4, v6 2195; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[7:10] 2196; GCN-HSA-NEXT: v_and_b32_e32 v12, s4, v3 2197; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v1 2198; GCN-HSA-NEXT: v_and_b32_e32 v8, s4, v1 2199; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v0 2200; GCN-HSA-NEXT: v_and_b32_e32 v6, s4, v0 2201; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v2 2202; GCN-HSA-NEXT: v_and_b32_e32 v10, s4, v2 2203; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2204; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2205; GCN-HSA-NEXT: v_and_b32_e32 v2, s4, v5 2206; GCN-HSA-NEXT: v_and_b32_e32 v0, s4, v4 2207; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2208; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2209; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[0:3] 2210; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[10:13] 2211; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[6:9] 2212; GCN-HSA-NEXT: s_endpgm 2213; 2214; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32: 2215; GCN-NOHSA-VI: ; %bb.0: 2216; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2217; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2218; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2219; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2220; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2221; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2222; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2223; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2224; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2225; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2226; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 2227; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2228; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2229; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2230; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2231; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2232; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2233; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, s6, v7 2234; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2235; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s6, v6 2236; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, s6, v1 2237; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2238; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s6, v0 2239; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2240; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, s6, v3 2241; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2242; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s6, v2 2243; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2244; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, s6, v5 2245; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2246; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s6, v4 2247; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2248; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2249; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2250; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2251; GCN-NOHSA-VI-NEXT: s_endpgm 2252; 2253; EG-LABEL: global_zextload_v16i16_to_v16i32: 2254; EG: ; %bb.0: 2255; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2256; EG-NEXT: TEX 1 @8 2257; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 2258; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 2259; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 2260; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 2261; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 2262; EG-NEXT: CF_END 2263; EG-NEXT: Fetch clause starting at 8: 2264; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 2265; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 2266; EG-NEXT: ALU clause starting at 12: 2267; EG-NEXT: MOV * T11.X, KC0[2].Z, 2268; EG-NEXT: ALU clause starting at 13: 2269; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 2270; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2271; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 2272; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2273; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 2274; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 2275; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2276; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 2277; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 2278; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 2279; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2280; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 2281; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 2282; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2283; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 2284; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 2285; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2286; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2287; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2288; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 2289; EG-NEXT: LSHR T17.W, T11.W, literal.y, 2290; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 2291; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2292; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2293; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 2294; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2295; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2296; EG-NEXT: LSHR T11.X, PV.W, literal.x, 2297; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 2298; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 2299; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2300; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2301; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2302; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2303; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 2304; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2305; 2306; CM-LABEL: global_zextload_v16i16_to_v16i32: 2307; CM: ; %bb.0: 2308; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2309; CM-NEXT: TEX 1 @8 2310; CM-NEXT: ALU 33, @13, KC0[CB0:0-32], KC1[] 2311; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X 2312; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T17.X 2313; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T16.X 2314; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T14.X 2315; CM-NEXT: CF_END 2316; CM-NEXT: Fetch clause starting at 8: 2317; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2318; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2319; CM-NEXT: ALU clause starting at 12: 2320; CM-NEXT: MOV * T11.X, KC0[2].Z, 2321; CM-NEXT: ALU clause starting at 13: 2322; CM-NEXT: LSHR * T13.W, T12.W, literal.x, 2323; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2324; CM-NEXT: AND_INT * T13.Z, T12.W, literal.x, 2325; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2326; CM-NEXT: LSHR T13.Y, T12.Z, literal.x, 2327; CM-NEXT: LSHR * T12.W, T12.Y, literal.x, 2328; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2329; CM-NEXT: AND_INT T13.X, T12.Z, literal.x, 2330; CM-NEXT: AND_INT T12.Z, T12.Y, literal.x, 2331; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2332; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2333; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2334; CM-NEXT: LSHR T12.Y, T12.X, literal.y, 2335; CM-NEXT: LSHR * T15.W, T11.W, literal.y, 2336; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2337; CM-NEXT: AND_INT T12.X, T12.X, literal.x, 2338; CM-NEXT: AND_INT T15.Z, T11.W, literal.x, 2339; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2340; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2341; CM-NEXT: LSHR T16.X, PV.W, literal.x, 2342; CM-NEXT: LSHR T15.Y, T11.Z, literal.y, 2343; CM-NEXT: LSHR * T11.W, T11.Y, literal.y, 2344; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2345; CM-NEXT: AND_INT T15.X, T11.Z, literal.x, 2346; CM-NEXT: AND_INT T11.Z, T11.Y, literal.x, 2347; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2348; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2349; CM-NEXT: LSHR T17.X, PV.W, literal.x, 2350; CM-NEXT: LSHR * T11.Y, T11.X, literal.y, 2351; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2352; CM-NEXT: AND_INT * T11.X, T11.X, literal.x, 2353; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2354; CM-NEXT: LSHR * T18.X, KC0[2].Y, literal.x, 2355; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2356 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 2357 %ext = zext <16 x i16> %load to <16 x i32> 2358 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2359 ret void 2360} 2361 2362define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 2363; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32: 2364; GCN-NOHSA-SI: ; %bb.0: 2365; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 2366; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 2367; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 2368; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 2369; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 2370; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2371; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 2372; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 2373; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2374; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 2375; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 2376; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2377; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2378; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2379; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2380; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v1, 0, 16 2381; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16 2382; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2383; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2384; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v3, 0, 16 2385; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16 2386; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2387; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2388; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2389; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 2390; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v4, 0, 16 2391; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2392; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2393; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v7, 0, 16 2394; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v6, 0, 16 2395; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:48 2396; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:32 2397; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16 2398; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2399; GCN-NOHSA-SI-NEXT: s_endpgm 2400; 2401; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32: 2402; GCN-HSA: ; %bb.0: 2403; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2404; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2405; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2406; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2407; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 2408; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2409; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2410; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2411; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2412; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2413; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2414; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2415; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 2416; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 2417; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2418; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2419; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 2420; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 2421; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2422; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2423; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 2424; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1 2425; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 2426; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0 2427; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2428; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2429; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16 2430; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 2431; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2432; GCN-HSA-NEXT: v_bfe_i32 v14, v3, 0, 16 2433; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2434; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16 2435; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2436; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 2437; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 2438; GCN-HSA-NEXT: s_waitcnt vmcnt(2) 2439; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2440; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 2441; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 2442; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 2443; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 2444; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2445; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 2446; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 2447; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[7:10] 2448; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 2449; GCN-HSA-NEXT: s_endpgm 2450; 2451; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32: 2452; GCN-NOHSA-VI: ; %bb.0: 2453; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2454; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2455; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2456; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2457; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2458; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2459; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2460; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2461; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2462; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2463; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2464; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2465; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2466; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2467; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2468; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2469; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2470; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v7, 0, 16 2471; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v6, 0, 16 2472; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16 2473; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2474; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 2475; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2476; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 2477; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2478; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16 2479; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2480; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2481; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 2482; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 2483; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2484; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2485; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2486; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2487; GCN-NOHSA-VI-NEXT: s_endpgm 2488; 2489; EG-LABEL: global_sextload_v16i16_to_v16i32: 2490; EG: ; %bb.0: 2491; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2492; EG-NEXT: TEX 1 @8 2493; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2494; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2495; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2496; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2497; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2498; EG-NEXT: CF_END 2499; EG-NEXT: Fetch clause starting at 8: 2500; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2501; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2502; EG-NEXT: ALU clause starting at 12: 2503; EG-NEXT: MOV * T11.X, KC0[2].Z, 2504; EG-NEXT: ALU clause starting at 13: 2505; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2506; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2507; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2508; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2509; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2510; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2511; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2512; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2513; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2514; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2515; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2516; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2517; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2518; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2519; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2520; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2521; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2522; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2523; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2524; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2525; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2526; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2527; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2528; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2529; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2530; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2531; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2532; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2533; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2534; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2535; EG-NEXT: LSHR T11.X, PS, literal.x, 2536; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2537; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2538; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2539; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2540; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2541; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2542; EG-NEXT: LSHR T12.X, PS, literal.x, 2543; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2544; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2545; 2546; CM-LABEL: global_sextload_v16i16_to_v16i32: 2547; CM: ; %bb.0: 2548; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2549; CM-NEXT: TEX 1 @8 2550; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[] 2551; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X 2552; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X 2553; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X 2554; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X 2555; CM-NEXT: CF_END 2556; CM-NEXT: Fetch clause starting at 8: 2557; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2558; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2559; CM-NEXT: ALU clause starting at 12: 2560; CM-NEXT: MOV * T11.X, KC0[2].Z, 2561; CM-NEXT: ALU clause starting at 13: 2562; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2563; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2564; CM-NEXT: LSHR T13.X, PV.W, literal.x, 2565; CM-NEXT: LSHR T0.Y, T11.Y, literal.y, 2566; CM-NEXT: LSHR T0.Z, T11.Z, literal.y, 2567; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2568; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2569; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2570; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2571; CM-NEXT: LSHR T1.Y, T11.W, literal.y, 2572; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212 2573; CM-NEXT: LSHR * T0.W, T12.X, literal.y, 2574; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2575; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x, 2576; CM-NEXT: LSHR T2.Y, T12.Y, literal.x, 2577; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x, 2578; CM-NEXT: LSHR * T1.W, T12.W, literal.x, 2579; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2580; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x, 2581; CM-NEXT: LSHR T3.Y, T12.Z, literal.x, 2582; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x, 2583; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x, 2584; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2585; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x, 2586; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x, 2587; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x, 2588; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 2589; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2590; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x, 2591; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x, 2592; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x, 2593; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x, 2594; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2595; CM-NEXT: LSHR T18.X, PV.Z, literal.x, 2596; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y, 2597; CM-NEXT: LSHR T0.Z, T11.X, literal.y, 2598; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y, 2599; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2600; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 2601; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y, 2602; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2603 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 2604 %ext = sext <16 x i16> %load to <16 x i32> 2605 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2606 ret void 2607} 2608 2609define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 2610; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32: 2611; GCN-NOHSA-SI: ; %bb.0: 2612; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2613; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2614; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2615; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, 0xffff 2616; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2617; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2618; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2619; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2620; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2621; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2622; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2623; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2624; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2625; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 2626; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2627; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2628; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v1 2629; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v0 2630; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 2631; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v7 2632; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 2633; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, s0, v3 2634; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, s0, v2 2635; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, s0, v1 2636; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, s0, v0 2637; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2638; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2639; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, s0, v7 2640; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, s0, v6 2641; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, s0, v5 2642; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s0, v4 2643; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2644; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 2645; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 2646; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v31, 16, v9 2647; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v29, 16, v8 2648; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, s0, v11 2649; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s0, v10 2650; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, s0, v9 2651; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, s0, v8 2652; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2653; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 2654; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 2655; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v13 2656; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v12 2657; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, s0, v15 2658; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s0, v14 2659; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, s0, v13 2660; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, s0, v12 2661; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2662; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2663; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 2664; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 2665; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 2666; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 2667; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2668; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 2669; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 2670; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2671; GCN-NOHSA-SI-NEXT: s_endpgm 2672; 2673; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32: 2674; GCN-HSA: ; %bb.0: 2675; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2676; GCN-HSA-NEXT: s_mov_b32 s14, 0xffff 2677; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2678; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 2679; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2680; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2681; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2682; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 2683; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2684; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 2685; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 2686; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 2687; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2688; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2689; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 2690; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 2691; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2692; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2693; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2694; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 2695; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 2696; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2697; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2698; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x60 2699; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 2700; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x70 2701; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 2702; GCN-HSA-NEXT: s_add_u32 s8, s0, 64 2703; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 2704; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x50 2705; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 2706; GCN-HSA-NEXT: s_add_u32 s12, s0, 32 2707; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 2708; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 2709; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 2710; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2711; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v1 2712; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 2713; GCN-HSA-NEXT: v_and_b32_e32 v18, s14, v1 2714; GCN-HSA-NEXT: v_and_b32_e32 v16, s14, v0 2715; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2716; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 2717; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 2718; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2719; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2720; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2721; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2722; GCN-HSA-NEXT: v_and_b32_e32 v18, s14, v5 2723; GCN-HSA-NEXT: v_and_b32_e32 v16, s14, v4 2724; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 2725; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 2726; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 2727; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 2728; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2729; GCN-HSA-NEXT: v_and_b32_e32 v18, s14, v7 2730; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2731; GCN-HSA-NEXT: v_and_b32_e32 v16, s14, v6 2732; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 2733; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19] 2734; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 2735; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 2736; GCN-HSA-NEXT: v_mov_b32_e32 v20, s3 2737; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2738; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v13 2739; GCN-HSA-NEXT: v_and_b32_e32 v6, s14, v13 2740; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v12 2741; GCN-HSA-NEXT: v_and_b32_e32 v4, s14, v12 2742; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 2743; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2744; GCN-HSA-NEXT: v_mov_b32_e32 v19, s2 2745; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v15 2746; GCN-HSA-NEXT: v_and_b32_e32 v17, s14, v15 2747; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v14 2748; GCN-HSA-NEXT: v_and_b32_e32 v15, s14, v14 2749; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2750; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[15:18] 2751; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2752; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v9 2753; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v8 2754; GCN-HSA-NEXT: v_and_b32_e32 v15, s14, v8 2755; GCN-HSA-NEXT: v_mov_b32_e32 v8, s1 2756; GCN-HSA-NEXT: v_and_b32_e32 v17, s14, v9 2757; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v3 2758; GCN-HSA-NEXT: v_and_b32_e32 v5, s14, v3 2759; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 2760; GCN-HSA-NEXT: v_and_b32_e32 v3, s14, v2 2761; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v11 2762; GCN-HSA-NEXT: v_and_b32_e32 v13, s14, v11 2763; GCN-HSA-NEXT: v_lshrrev_b32_e32 v12, 16, v10 2764; GCN-HSA-NEXT: v_and_b32_e32 v11, s14, v10 2765; GCN-HSA-NEXT: v_mov_b32_e32 v7, s0 2766; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[15:18] 2767; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[11:14] 2768; GCN-HSA-NEXT: flat_store_dwordx4 v[7:8], v[3:6] 2769; GCN-HSA-NEXT: s_endpgm 2770; 2771; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32: 2772; GCN-NOHSA-VI: ; %bb.0: 2773; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2774; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2775; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2776; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2777; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2778; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2779; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2780; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2781; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2782; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2783; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2784; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2785; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, 0xffff 2786; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2787; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 2788; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2789; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, s0, v3 2790; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2791; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s0, v2 2792; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 2793; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, s0, v1 2794; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2795; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s0, v0 2796; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 2797; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v7 2798; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, s0, v7 2799; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v6 2800; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, s0, v6 2801; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v5 2802; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, s0, v5 2803; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 2804; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s0, v4 2805; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2806; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v11 2807; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, s0, v11 2808; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v10 2809; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, s0, v10 2810; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v9 2811; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, s0, v9 2812; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v8 2813; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s0, v8 2814; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2815; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v15 2816; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, s0, v15 2817; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v14 2818; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, s0, v14 2819; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v13 2820; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, s0, v13 2821; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v12 2822; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s0, v12 2823; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2824; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:96 2825; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112 2826; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64 2827; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80 2828; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 2829; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:48 2830; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2831; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2832; GCN-NOHSA-VI-NEXT: s_endpgm 2833; 2834; EG-LABEL: global_zextload_v32i16_to_v32i32: 2835; EG: ; %bb.0: 2836; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2837; EG-NEXT: TEX 3 @12 2838; EG-NEXT: ALU 72, @21, KC0[CB0:0-32], KC1[] 2839; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0 2840; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0 2841; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0 2842; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0 2843; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0 2844; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2845; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 2846; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1 2847; EG-NEXT: CF_END 2848; EG-NEXT: Fetch clause starting at 12: 2849; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2850; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2851; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2852; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2853; EG-NEXT: ALU clause starting at 20: 2854; EG-NEXT: MOV * T19.X, KC0[2].Z, 2855; EG-NEXT: ALU clause starting at 21: 2856; EG-NEXT: LSHR * T23.W, T20.W, literal.x, 2857; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2858; EG-NEXT: AND_INT * T23.Z, T20.W, literal.x, 2859; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2860; EG-NEXT: LSHR T23.Y, T20.Z, literal.x, 2861; EG-NEXT: LSHR * T20.W, T20.Y, literal.x, 2862; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2863; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 2864; EG-NEXT: AND_INT T20.Z, T20.Y, literal.x, 2865; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2866; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2867; EG-NEXT: LSHR T24.X, PV.W, literal.x, 2868; EG-NEXT: LSHR T20.Y, T20.X, literal.y, 2869; EG-NEXT: LSHR T25.W, T19.W, literal.y, 2870; EG-NEXT: AND_INT * T20.X, T20.X, literal.z, 2871; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2872; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2873; EG-NEXT: AND_INT * T25.Z, T19.W, literal.x, 2874; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2875; EG-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 2876; EG-NEXT: LSHR T25.Y, T19.Z, literal.y, 2877; EG-NEXT: LSHR T19.W, T19.Y, literal.y, 2878; EG-NEXT: AND_INT * T25.X, T19.Z, literal.z, 2879; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2880; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2881; EG-NEXT: AND_INT T19.Z, T19.Y, literal.x, 2882; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2883; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2884; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2885; EG-NEXT: LSHR T19.Y, T19.X, literal.y, 2886; EG-NEXT: LSHR T28.W, T22.W, literal.y, 2887; EG-NEXT: AND_INT * T19.X, T19.X, literal.z, 2888; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2889; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2890; EG-NEXT: AND_INT T28.Z, T22.W, literal.x, 2891; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2892; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2893; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2894; EG-NEXT: LSHR T28.Y, T22.Z, literal.y, 2895; EG-NEXT: LSHR T22.W, T22.Y, literal.y, 2896; EG-NEXT: AND_INT * T28.X, T22.Z, literal.z, 2897; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2898; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2899; EG-NEXT: AND_INT T22.Z, T22.Y, literal.x, 2900; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2901; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2902; EG-NEXT: LSHR T30.X, PV.W, literal.x, 2903; EG-NEXT: LSHR T22.Y, T22.X, literal.y, 2904; EG-NEXT: LSHR T31.W, T21.W, literal.y, 2905; EG-NEXT: AND_INT * T22.X, T22.X, literal.z, 2906; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2907; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2908; EG-NEXT: AND_INT T31.Z, T21.W, literal.x, 2909; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2910; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2911; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2912; EG-NEXT: LSHR T31.Y, T21.Z, literal.y, 2913; EG-NEXT: LSHR T21.W, T21.Y, literal.y, 2914; EG-NEXT: AND_INT * T31.X, T21.Z, literal.z, 2915; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2916; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2917; EG-NEXT: AND_INT T21.Z, T21.Y, literal.x, 2918; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2919; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2920; EG-NEXT: LSHR T33.X, PV.W, literal.x, 2921; EG-NEXT: LSHR T21.Y, T21.X, literal.y, 2922; EG-NEXT: AND_INT * T21.X, T21.X, literal.z, 2923; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2924; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2925; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2926; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 2927; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2928; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2929; 2930; CM-LABEL: global_zextload_v32i16_to_v32i32: 2931; CM: ; %bb.0: 2932; CM-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2933; CM-NEXT: TEX 3 @12 2934; CM-NEXT: ALU 65, @21, KC0[CB0:0-32], KC1[] 2935; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 2936; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T21.X 2937; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T32.X 2938; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T22.X 2939; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T29.X 2940; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T19.X 2941; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T26.X 2942; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T20.X 2943; CM-NEXT: CF_END 2944; CM-NEXT: Fetch clause starting at 12: 2945; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 2946; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 0, #1 2947; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1 2948; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 32, #1 2949; CM-NEXT: ALU clause starting at 20: 2950; CM-NEXT: MOV * T19.X, KC0[2].Z, 2951; CM-NEXT: ALU clause starting at 21: 2952; CM-NEXT: LSHR * T23.W, T20.Y, literal.x, 2953; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2954; CM-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2955; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2956; CM-NEXT: LSHR T23.Y, T20.X, literal.x, 2957; CM-NEXT: LSHR * T24.W, T20.W, literal.x, 2958; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2959; CM-NEXT: AND_INT T23.X, T20.X, literal.x, 2960; CM-NEXT: AND_INT T24.Z, T20.W, literal.x, 2961; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2962; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2963; CM-NEXT: LSHR T20.X, PV.W, literal.x, 2964; CM-NEXT: LSHR T24.Y, T20.Z, literal.y, 2965; CM-NEXT: LSHR * T25.W, T19.Y, literal.y, 2966; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2967; CM-NEXT: AND_INT T24.X, T20.Z, literal.x, 2968; CM-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2969; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2970; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2971; CM-NEXT: LSHR T26.X, PV.W, literal.x, 2972; CM-NEXT: LSHR T25.Y, T19.X, literal.y, 2973; CM-NEXT: LSHR * T27.W, T19.W, literal.y, 2974; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2975; CM-NEXT: AND_INT T25.X, T19.X, literal.x, 2976; CM-NEXT: AND_INT T27.Z, T19.W, literal.x, 2977; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2978; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2979; CM-NEXT: LSHR T19.X, PV.W, literal.x, 2980; CM-NEXT: LSHR T27.Y, T19.Z, literal.y, 2981; CM-NEXT: LSHR * T28.W, T22.Y, literal.y, 2982; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2983; CM-NEXT: AND_INT T27.X, T19.Z, literal.x, 2984; CM-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2985; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2986; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2987; CM-NEXT: LSHR T29.X, PV.W, literal.x, 2988; CM-NEXT: LSHR T28.Y, T22.X, literal.y, 2989; CM-NEXT: LSHR * T30.W, T22.W, literal.y, 2990; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2991; CM-NEXT: AND_INT T28.X, T22.X, literal.x, 2992; CM-NEXT: AND_INT T30.Z, T22.W, literal.x, 2993; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2994; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2995; CM-NEXT: LSHR T22.X, PV.W, literal.x, 2996; CM-NEXT: LSHR T30.Y, T22.Z, literal.y, 2997; CM-NEXT: LSHR * T31.W, T21.Y, literal.y, 2998; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2999; CM-NEXT: AND_INT T30.X, T22.Z, literal.x, 3000; CM-NEXT: AND_INT T31.Z, T21.Y, literal.x, 3001; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3002; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3003; CM-NEXT: LSHR T32.X, PV.W, literal.x, 3004; CM-NEXT: LSHR T31.Y, T21.X, literal.y, 3005; CM-NEXT: LSHR * T33.W, T21.W, literal.y, 3006; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3007; CM-NEXT: AND_INT T31.X, T21.X, literal.x, 3008; CM-NEXT: AND_INT * T33.Z, T21.W, literal.x, 3009; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3010; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 3011; CM-NEXT: LSHR * T33.Y, T21.Z, literal.y, 3012; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3013; CM-NEXT: AND_INT T33.X, T21.Z, literal.x, 3014; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3015; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3016; CM-NEXT: LSHR * T34.X, PV.W, literal.x, 3017; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3018 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 3019 %ext = zext <32 x i16> %load to <32 x i32> 3020 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 3021 ret void 3022} 3023 3024define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 3025; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32: 3026; GCN-NOHSA-SI: ; %bb.0: 3027; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3028; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3029; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3030; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3031; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3032; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3033; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3034; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3035; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3036; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3037; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3038; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3039; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 3040; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3041; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3042; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v3, 0, 16 3043; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v2, 0, 16 3044; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3045; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3046; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16 3047; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v0, 0, 16 3048; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 3049; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3050; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3051; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16 3052; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v6, 0, 16 3053; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3054; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3055; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v5, 0, 16 3056; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v4, 0, 16 3057; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3058; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3059; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3060; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v11, 0, 16 3061; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v10, 0, 16 3062; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3063; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3064; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v9, 0, 16 3065; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v8, 0, 16 3066; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3067; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3068; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3069; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v15, 0, 16 3070; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v14, 0, 16 3071; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3072; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3073; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v13, 0, 16 3074; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v12, 0, 16 3075; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3076; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3077; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3078; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3079; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3080; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3081; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3082; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3083; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3084; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3085; GCN-NOHSA-SI-NEXT: s_endpgm 3086; 3087; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32: 3088; GCN-HSA: ; %bb.0: 3089; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3090; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3091; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 3092; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3093; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3094; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3095; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3096; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3097; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3098; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 3099; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 3100; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3101; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 3102; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3103; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 3104; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3105; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3106; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 3107; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 3108; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 3109; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3110; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3111; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 3112; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v1 3113; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v0 3114; GCN-HSA-NEXT: v_bfe_i32 v18, v1, 0, 16 3115; GCN-HSA-NEXT: v_bfe_i32 v16, v0, 0, 16 3116; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 3117; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 3118; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 3119; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3120; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3121; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3122; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3123; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 3124; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 3125; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3126; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3127; GCN-HSA-NEXT: v_bfe_i32 v18, v3, 0, 16 3128; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3129; GCN-HSA-NEXT: v_bfe_i32 v16, v2, 0, 16 3130; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3131; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 3132; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 3133; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 3134; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3135; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3136; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3137; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3138; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3139; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3140; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3141; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 3142; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 3143; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 3144; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3145; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 3146; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 3147; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 3148; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3149; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3150; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3151; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3152; GCN-HSA-NEXT: v_bfe_i32 v2, v7, 0, 16 3153; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3154; GCN-HSA-NEXT: v_bfe_i32 v0, v6, 0, 16 3155; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 3156; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v9 3157; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v8 3158; GCN-HSA-NEXT: v_bfe_i32 v6, v9, 0, 16 3159; GCN-HSA-NEXT: v_bfe_i32 v4, v8, 0, 16 3160; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3161; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3162; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 3163; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3164; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v11 3165; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v10 3166; GCN-HSA-NEXT: v_bfe_i32 v2, v11, 0, 16 3167; GCN-HSA-NEXT: v_bfe_i32 v0, v10, 0, 16 3168; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[4:7] 3169; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 3170; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 3171; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 3172; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v13 3173; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v12 3174; GCN-HSA-NEXT: v_bfe_i32 v6, v13, 0, 16 3175; GCN-HSA-NEXT: v_bfe_i32 v4, v12, 0, 16 3176; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 3177; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v15 3178; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v14 3179; GCN-HSA-NEXT: v_bfe_i32 v2, v15, 0, 16 3180; GCN-HSA-NEXT: v_bfe_i32 v0, v14, 0, 16 3181; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3182; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 3183; GCN-HSA-NEXT: s_endpgm 3184; 3185; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32: 3186; GCN-NOHSA-VI: ; %bb.0: 3187; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 3188; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3189; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3190; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3191; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3192; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3193; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3194; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3195; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3196; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3197; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3198; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3199; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3200; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3201; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 3202; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3203; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16 3204; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3205; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3206; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3207; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3208; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v13, 0, 16 3209; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v12, 0, 16 3210; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v2, 0, 16 3211; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3212; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v1, 0, 16 3213; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3214; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v0, 0, 16 3215; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3216; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 3217; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3218; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16 3219; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3220; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v5, 0, 16 3221; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3222; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v4, 0, 16 3223; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3224; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v11, 0, 16 3225; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3226; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v10, 0, 16 3227; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3228; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v9, 0, 16 3229; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3230; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v8, 0, 16 3231; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3232; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3233; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v15, 0, 16 3234; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v14, 0, 16 3235; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3236; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3237; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3238; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3239; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3240; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3241; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3242; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3243; GCN-NOHSA-VI-NEXT: s_endpgm 3244; 3245; EG-LABEL: global_sextload_v32i16_to_v32i32: 3246; EG: ; %bb.0: 3247; EG-NEXT: ALU 9, @20, KC0[CB0:0-32], KC1[] 3248; EG-NEXT: TEX 3 @12 3249; EG-NEXT: ALU 73, @30, KC0[CB0:0-32], KC1[] 3250; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0 3251; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 3252; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0 3253; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0 3254; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0 3255; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 3256; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 3257; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 3258; EG-NEXT: CF_END 3259; EG-NEXT: Fetch clause starting at 12: 3260; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 3261; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 3262; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 3263; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 3264; EG-NEXT: ALU clause starting at 20: 3265; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3266; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3267; EG-NEXT: LSHR T19.X, PV.W, literal.x, 3268; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.x, 3269; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3270; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3271; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3272; EG-NEXT: LSHR T21.X, PV.W, literal.x, 3273; EG-NEXT: MOV * T22.X, KC0[2].Z, 3274; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3275; EG-NEXT: ALU clause starting at 30: 3276; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3277; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3278; EG-NEXT: LSHR T26.X, PV.W, literal.x, 3279; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3280; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 3281; EG-NEXT: LSHR T27.X, PV.W, literal.x, 3282; EG-NEXT: LSHR T0.W, T22.Y, literal.y, 3283; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3284; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3285; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3286; EG-NEXT: LSHR T28.X, PS, literal.x, 3287; EG-NEXT: LSHR T0.Y, T22.W, literal.y, 3288; EG-NEXT: BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212 3289; EG-NEXT: LSHR T1.W, T24.Y, literal.y, 3290; EG-NEXT: LSHR * T2.W, T24.W, literal.y, 3291; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3292; EG-NEXT: BFE_INT T29.X, T25.Z, 0.0, literal.x, 3293; EG-NEXT: LSHR T1.Y, T23.Y, literal.x, 3294; EG-NEXT: BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3295; EG-NEXT: LSHR T3.W, T23.W, literal.x, 3296; EG-NEXT: LSHR * T4.W, T25.W, literal.x, 3297; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3298; EG-NEXT: BFE_INT T30.X, T25.X, 0.0, literal.x, 3299; EG-NEXT: LSHR T2.Y, T25.Y, literal.x, 3300; EG-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3301; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 3302; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 3303; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3304; EG-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3305; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 3306; EG-NEXT: BFE_INT T25.Z, T23.Y, 0.0, literal.x, 3307; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 3308; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 3309; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3310; EG-NEXT: BFE_INT T25.X, T23.X, 0.0, literal.x, 3311; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 3312; EG-NEXT: BFE_INT T32.Z, T24.W, 0.0, literal.x, 3313; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 3314; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 3315; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3316; EG-NEXT: BFE_INT T32.X, T24.Z, 0.0, literal.x, 3317; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 3318; EG-NEXT: BFE_INT T23.Z, T24.Y, 0.0, literal.x, 3319; EG-NEXT: BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3320; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 3321; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3322; EG-NEXT: BFE_INT T23.X, T24.X, 0.0, literal.x, 3323; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 3324; EG-NEXT: BFE_INT T33.Z, T22.W, 0.0, literal.x, 3325; EG-NEXT: BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 3326; EG-NEXT: LSHR * T2.W, T24.Z, literal.x, 3327; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3328; EG-NEXT: BFE_INT T33.X, T22.Z, 0.0, literal.x, 3329; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 3330; EG-NEXT: BFE_INT T24.Z, T22.Y, 0.0, literal.x, 3331; EG-NEXT: BFE_INT T23.W, T1.W, 0.0, literal.x, 3332; EG-NEXT: LSHR * T1.W, T24.X, literal.x, 3333; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3334; EG-NEXT: BFE_INT T24.X, T22.X, 0.0, literal.x, 3335; EG-NEXT: BFE_INT T23.Y, PS, 0.0, literal.x, 3336; EG-NEXT: LSHR T0.Z, T22.Z, literal.x, 3337; EG-NEXT: BFE_INT T33.W, T0.Y, 0.0, literal.x, 3338; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3339; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) 3340; EG-NEXT: LSHR T34.X, PS, literal.x, 3341; EG-NEXT: BFE_INT T33.Y, PV.Z, 0.0, literal.y, 3342; EG-NEXT: LSHR T0.Z, T22.X, literal.y, 3343; EG-NEXT: BFE_INT T24.W, T0.W, 0.0, literal.y, 3344; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3345; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3346; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3347; EG-NEXT: LSHR T22.X, PS, literal.x, 3348; EG-NEXT: BFE_INT * T24.Y, PV.Z, 0.0, literal.y, 3349; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3350; 3351; CM-LABEL: global_sextload_v32i16_to_v32i32: 3352; CM: ; %bb.0: 3353; CM-NEXT: ALU 0, @22, KC0[CB0:0-32], KC1[] 3354; CM-NEXT: TEX 0 @14 3355; CM-NEXT: ALU 7, @23, KC0[CB0:0-32], KC1[] 3356; CM-NEXT: TEX 2 @16 3357; CM-NEXT: ALU 76, @31, KC0[CB0:0-32], KC1[] 3358; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 3359; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T20.X 3360; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T28.X 3361; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T27.X 3362; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T26.X 3363; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T25.X 3364; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T24.X 3365; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T21.X 3366; CM-NEXT: CF_END 3367; CM-NEXT: Fetch clause starting at 14: 3368; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 3369; CM-NEXT: Fetch clause starting at 16: 3370; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1 3371; CM-NEXT: VTX_READ_128 T23.XYZW, T19.X, 32, #1 3372; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 3373; CM-NEXT: ALU clause starting at 22: 3374; CM-NEXT: MOV * T19.X, KC0[2].Z, 3375; CM-NEXT: ALU clause starting at 23: 3376; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3377; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3378; CM-NEXT: LSHR T21.X, PV.W, literal.x, 3379; CM-NEXT: LSHR T0.Y, T20.Z, literal.y, 3380; CM-NEXT: LSHR T0.Z, T20.W, literal.y, 3381; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3382; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3383; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3384; CM-NEXT: ALU clause starting at 31: 3385; CM-NEXT: LSHR T24.X, T0.W, literal.x, 3386; CM-NEXT: LSHR T1.Y, T20.Y, literal.y, 3387; CM-NEXT: LSHR T1.Z, T19.Z, literal.y, 3388; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3389; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3390; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3391; CM-NEXT: LSHR T25.X, PV.W, literal.x, 3392; CM-NEXT: LSHR T2.Y, T19.W, literal.y, 3393; CM-NEXT: LSHR T2.Z, T19.X, literal.y, 3394; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3395; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3396; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 3397; CM-NEXT: LSHR T26.X, PV.W, literal.x, 3398; CM-NEXT: LSHR T3.Y, T19.Y, literal.y, 3399; CM-NEXT: LSHR T3.Z, T23.Z, literal.y, 3400; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3401; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3402; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3403; CM-NEXT: LSHR T27.X, PV.W, literal.x, 3404; CM-NEXT: LSHR T4.Y, T23.W, literal.y, 3405; CM-NEXT: LSHR T4.Z, T23.X, literal.y, 3406; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3407; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3408; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3409; CM-NEXT: LSHR T28.X, PV.W, literal.x, 3410; CM-NEXT: LSHR T5.Y, T23.Y, literal.y, 3411; CM-NEXT: BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212 3412; CM-NEXT: LSHR * T0.W, T22.Z, literal.y, 3413; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3414; CM-NEXT: BFE_INT T29.X, T22.X, 0.0, literal.x, 3415; CM-NEXT: LSHR T6.Y, T22.W, literal.x, 3416; CM-NEXT: BFE_INT T30.Z, T22.W, 0.0, literal.x, 3417; CM-NEXT: LSHR * T1.W, T22.Y, literal.x, 3418; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3419; CM-NEXT: BFE_INT T30.X, T22.Z, 0.0, literal.x, 3420; CM-NEXT: LSHR T7.Y, T22.X, literal.x, 3421; CM-NEXT: BFE_INT T22.Z, T23.Y, 0.0, literal.x, 3422; CM-NEXT: BFE_INT * T29.W, PV.W, 0.0, literal.x, 3423; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3424; CM-NEXT: BFE_INT T22.X, T23.X, 0.0, literal.x, 3425; CM-NEXT: BFE_INT T29.Y, PV.Y, 0.0, literal.x, 3426; CM-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3427; CM-NEXT: BFE_INT * T30.W, T6.Y, 0.0, literal.x, 3428; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3429; CM-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3430; CM-NEXT: BFE_INT T30.Y, T0.W, 0.0, literal.x, 3431; CM-NEXT: BFE_INT T23.Z, T19.Y, 0.0, literal.x, 3432; CM-NEXT: BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3433; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3434; CM-NEXT: BFE_INT T23.X, T19.X, 0.0, literal.x, 3435; CM-NEXT: BFE_INT T22.Y, T4.Z, 0.0, literal.x, 3436; CM-NEXT: BFE_INT T32.Z, T19.W, 0.0, literal.x, 3437; CM-NEXT: BFE_INT * T31.W, T4.Y, 0.0, literal.x, 3438; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3439; CM-NEXT: BFE_INT T32.X, T19.Z, 0.0, literal.x, 3440; CM-NEXT: BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3441; CM-NEXT: BFE_INT T19.Z, T20.Y, 0.0, literal.x, 3442; CM-NEXT: BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3443; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3444; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 3445; CM-NEXT: BFE_INT T23.Y, T2.Z, 0.0, literal.x, 3446; CM-NEXT: BFE_INT T33.Z, T20.W, 0.0, literal.x, 3447; CM-NEXT: BFE_INT * T32.W, T2.Y, 0.0, literal.x, 3448; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3449; CM-NEXT: BFE_INT T33.X, T20.Z, 0.0, literal.x, 3450; CM-NEXT: BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3451; CM-NEXT: LSHR T1.Z, T20.X, literal.x, 3452; CM-NEXT: BFE_INT * T19.W, T1.Y, 0.0, literal.x, 3453; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3454; CM-NEXT: LSHR T20.X, KC0[2].Y, literal.x, 3455; CM-NEXT: BFE_INT T19.Y, PV.Z, 0.0, literal.y, 3456; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 3457; CM-NEXT: BFE_INT * T33.W, T0.Z, 0.0, literal.y, 3458; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3459; CM-NEXT: LSHR T34.X, PV.Z, literal.x, 3460; CM-NEXT: BFE_INT * T33.Y, T0.Y, 0.0, literal.y, 3461; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3462 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 3463 %ext = sext <32 x i16> %load to <32 x i32> 3464 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 3465 ret void 3466} 3467 3468define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 3469; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32: 3470; GCN-NOHSA-SI: ; %bb.0: 3471; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 3472; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 3473; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 3474; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 3475; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s3 3476; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 3477; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3478; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3479; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3480; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, 0xffff 3481; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3482; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3483; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3484; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3485; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3486; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3487; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3488; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3489; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3490; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 3491; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 3492; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 3493; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 3494; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(7) 3495; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v3 3496; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v2 3497; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v39, 16, v1 3498; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v37, 16, v0 3499; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(6) 3500; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v43, 16, v7 3501; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v41, 16, v6 3502; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, s0, v3 3503; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, s0, v2 3504; GCN-NOHSA-SI-NEXT: buffer_store_dword v32, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 3505; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3506; GCN-NOHSA-SI-NEXT: buffer_store_dword v33, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 3507; GCN-NOHSA-SI-NEXT: buffer_store_dword v34, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 3508; GCN-NOHSA-SI-NEXT: buffer_store_dword v35, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 3509; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, s0, v1 3510; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v36, s0, v0 3511; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3512; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v5 3513; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v4 3514; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, s0, v7 3515; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v40, s0, v6 3516; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, s0, v5 3517; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, s0, v4 3518; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 3519; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 3520; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v47, 16, v9 3521; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v45, 16, v8 3522; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, s0, v11 3523; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s0, v10 3524; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, s0, v9 3525; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v44, s0, v8 3526; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 3527; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 3528; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v51, 16, v13 3529; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v49, 16, v12 3530; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, s0, v15 3531; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s0, v14 3532; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, s0, v13 3533; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v48, s0, v12 3534; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v19 3535; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v18 3536; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v55, 16, v17 3537; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v53, 16, v16 3538; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, s0, v19 3539; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, s0, v18 3540; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, s0, v17 3541; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v52, s0, v16 3542; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v23 3543; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v22 3544; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v59, 16, v21 3545; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v57, 16, v20 3546; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, s0, v23 3547; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, s0, v22 3548; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, s0, v21 3549; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v56, s0, v20 3550; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v27 3551; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v26 3552; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v63, 16, v25 3553; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v61, 16, v24 3554; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, s0, v27 3555; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, s0, v26 3556; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v62, s0, v25 3557; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v60, s0, v24 3558; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v31 3559; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v30 3560; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v29 3561; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v28 3562; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, s0, v31 3563; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, s0, v30 3564; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, s0, v29 3565; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s0, v28 3566; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3567; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3568; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3569; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:240 3570; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 3571; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208 3572; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 3573; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3574; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 3575; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 3576; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 3577; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3578; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 3579; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3580; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:32 3581; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:48 3582; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 3583; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 3584; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 3585; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 3586; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 3587; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3588; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3589; GCN-NOHSA-SI-NEXT: s_endpgm 3590; 3591; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32: 3592; GCN-HSA: ; %bb.0: 3593; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3594; GCN-HSA-NEXT: s_movk_i32 s14, 0x50 3595; GCN-HSA-NEXT: s_movk_i32 s15, 0x60 3596; GCN-HSA-NEXT: s_movk_i32 s16, 0x70 3597; GCN-HSA-NEXT: s_mov_b32 s17, 0xffff 3598; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3599; GCN-HSA-NEXT: s_add_u32 s4, s2, s14 3600; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3601; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3602; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 3603; GCN-HSA-NEXT: s_add_u32 s4, s2, s15 3604; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3605; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3606; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3607; GCN-HSA-NEXT: s_add_u32 s4, s2, s16 3608; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3609; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 3610; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3611; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 3612; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 3613; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 3614; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3615; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5 3616; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4 3617; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 3618; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3619; GCN-HSA-NEXT: s_add_u32 s6, s2, 48 3620; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 3621; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 3622; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 3623; GCN-HSA-NEXT: s_add_u32 s2, s2, 64 3624; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3625; GCN-HSA-NEXT: v_mov_b32_e32 v17, s5 3626; GCN-HSA-NEXT: v_mov_b32_e32 v21, s7 3627; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3628; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 3629; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 3630; GCN-HSA-NEXT: v_mov_b32_e32 v16, s4 3631; GCN-HSA-NEXT: v_mov_b32_e32 v20, s6 3632; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3633; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] 3634; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[20:21] 3635; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[28:29] 3636; GCN-HSA-NEXT: flat_load_dwordx4 v[32:35], v[32:33] 3637; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 3638; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 3639; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3640; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3641; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xf0 3642; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 3643; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xc0 3644; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 3645; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xd0 3646; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 3647; GCN-HSA-NEXT: s_add_u32 s12, s0, 0xa0 3648; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 3649; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 3650; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v1 3651; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v0 3652; GCN-HSA-NEXT: v_and_b32_e32 v26, s17, v1 3653; GCN-HSA-NEXT: v_and_b32_e32 v24, s17, v0 3654; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 3655; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 3656; GCN-HSA-NEXT: s_add_u32 s12, s0, 0xb0 3657; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 3658; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 3659; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 3660; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v3 3661; GCN-HSA-NEXT: v_and_b32_e32 v26, s17, v3 3662; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v2 3663; GCN-HSA-NEXT: v_and_b32_e32 v24, s17, v2 3664; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 3665; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 3666; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 3667; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 3668; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 3669; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v5 3670; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v4 3671; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 3672; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 3673; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3674; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 3675; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v7 3676; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v7 3677; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v6 3678; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v6 3679; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 3680; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3681; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3682; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3683; GCN-HSA-NEXT: s_waitcnt vmcnt(9) 3684; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v9 3685; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v9 3686; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v8 3687; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v8 3688; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3689; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3690; GCN-HSA-NEXT: v_mov_b32_e32 v25, s1 3691; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v11 3692; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v11 3693; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v10 3694; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v10 3695; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3696; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3697; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3698; GCN-HSA-NEXT: v_mov_b32_e32 v27, s5 3699; GCN-HSA-NEXT: v_mov_b32_e32 v24, s0 3700; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 3701; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v33 3702; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v32 3703; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v33 3704; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v32 3705; GCN-HSA-NEXT: v_mov_b32_e32 v26, s4 3706; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v35 3707; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v34 3708; GCN-HSA-NEXT: v_and_b32_e32 v6, s17, v35 3709; GCN-HSA-NEXT: v_and_b32_e32 v4, s17, v34 3710; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3711; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[4:7] 3712; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3713; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3714; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3715; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3716; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3717; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3718; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3719; GCN-HSA-NEXT: s_add_u32 s2, s0, s15 3720; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v29 3721; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v28 3722; GCN-HSA-NEXT: v_and_b32_e32 v10, s17, v29 3723; GCN-HSA-NEXT: v_and_b32_e32 v8, s17, v28 3724; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 3725; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v31 3726; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v30 3727; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v31 3728; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v30 3729; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3730; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3731; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3732; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3733; GCN-HSA-NEXT: s_add_u32 s2, s0, s16 3734; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3735; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3736; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3737; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3738; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v21 3739; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v20 3740; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v21 3741; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v20 3742; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3743; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v23 3744; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v22 3745; GCN-HSA-NEXT: v_and_b32_e32 v6, s17, v23 3746; GCN-HSA-NEXT: v_and_b32_e32 v4, s17, v22 3747; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3748; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3749; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v15 3750; GCN-HSA-NEXT: v_and_b32_e32 v2, s17, v15 3751; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v14 3752; GCN-HSA-NEXT: v_and_b32_e32 v0, s17, v14 3753; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v13 3754; GCN-HSA-NEXT: v_and_b32_e32 v6, s17, v13 3755; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v12 3756; GCN-HSA-NEXT: v_and_b32_e32 v4, s17, v12 3757; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v17 3758; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v16 3759; GCN-HSA-NEXT: v_and_b32_e32 v14, s17, v17 3760; GCN-HSA-NEXT: v_and_b32_e32 v12, s17, v16 3761; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3762; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3763; GCN-HSA-NEXT: s_add_u32 s2, s0, s14 3764; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3765; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 3766; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v19 3767; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3768; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3769; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3770; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3771; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v18 3772; GCN-HSA-NEXT: v_and_b32_e32 v10, s17, v19 3773; GCN-HSA-NEXT: v_and_b32_e32 v8, s17, v18 3774; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3775; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 3776; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3777; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3778; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3779; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3780; GCN-HSA-NEXT: s_nop 0 3781; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3782; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3783; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3784; GCN-HSA-NEXT: s_endpgm 3785; 3786; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32: 3787; GCN-NOHSA-VI: ; %bb.0: 3788; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 3789; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 3790; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 3791; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 3792; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 3793; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s3 3794; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3795; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3796; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3797; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3798; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3799; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3800; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3801; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:96 3802; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80 3803; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:64 3804; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 3805; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 3806; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 3807; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:48 3808; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, 0xffff 3809; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 3810; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3811; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 3812; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v15 3813; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, s0, v15 3814; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v14 3815; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, s0, v14 3816; GCN-NOHSA-VI-NEXT: buffer_store_dword v28, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 3817; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3818; GCN-NOHSA-VI-NEXT: buffer_store_dword v29, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 3819; GCN-NOHSA-VI-NEXT: buffer_store_dword v30, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 3820; GCN-NOHSA-VI-NEXT: buffer_store_dword v31, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 3821; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[56:59], off, s[8:11], 0 offset:112 3822; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v13 3823; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, s0, v13 3824; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v12 3825; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, s0, v12 3826; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v35, 16, v19 3827; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, s0, v19 3828; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v18 3829; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, s0, v18 3830; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v17 3831; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, s0, v17 3832; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v16 3833; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s0, v16 3834; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v23 3835; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, s0, v23 3836; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v22 3837; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, s0, v22 3838; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v21 3839; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, s0, v21 3840; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v20 3841; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, s0, v20 3842; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v43, 16, v27 3843; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v42, s0, v27 3844; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v41, 16, v26 3845; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v40, s0, v26 3846; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v25 3847; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, s0, v25 3848; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v24 3849; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, s0, v24 3850; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v47, 16, v11 3851; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v46, s0, v11 3852; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v45, 16, v10 3853; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v44, s0, v10 3854; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v9 3855; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, s0, v9 3856; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v8 3857; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s0, v8 3858; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v51, 16, v7 3859; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v50, s0, v7 3860; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v49, 16, v6 3861; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v48, s0, v6 3862; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v5 3863; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, s0, v5 3864; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v4 3865; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s0, v4 3866; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v55, 16, v3 3867; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v54, s0, v3 3868; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v53, 16, v2 3869; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v52, s0, v2 3870; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 3871; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, s0, v1 3872; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 3873; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s0, v0 3874; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3875; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v61, 16, v59 3876; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v60, s0, v59 3877; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v59, 16, v58 3878; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v58, s0, v58 3879; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v57 3880; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, s0, v57 3881; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s0, v56 3882; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v56 3883; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3884; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3885; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:240 3886; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 3887; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:208 3888; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 3889; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:176 3890; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 3891; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:144 3892; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96 3893; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:112 3894; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 3895; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:80 3896; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 3897; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:48 3898; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 3899; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 3900; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 3901; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 3902; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 3903; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3904; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3905; GCN-NOHSA-VI-NEXT: s_endpgm 3906; 3907; EG-LABEL: global_zextload_v64i16_to_v64i32: 3908; EG: ; %bb.0: 3909; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3910; EG-NEXT: TEX 3 @22 3911; EG-NEXT: ALU 56, @39, KC0[CB0:0-32], KC1[] 3912; EG-NEXT: TEX 3 @30 3913; EG-NEXT: ALU 87, @96, KC0[CB0:0-32], KC1[] 3914; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0 3915; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0 3916; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0 3917; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0 3918; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0 3919; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0 3920; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0 3921; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0 3922; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T55.X, 0 3923; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T53.X, 0 3924; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T48.X, 0 3925; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0 3926; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T46.X, 0 3927; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0 3928; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0 3929; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 1 3930; EG-NEXT: CF_END 3931; EG-NEXT: Fetch clause starting at 22: 3932; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1 3933; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 48, #1 3934; EG-NEXT: VTX_READ_128 T38.XYZW, T35.X, 32, #1 3935; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 16, #1 3936; EG-NEXT: Fetch clause starting at 30: 3937; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1 3938; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1 3939; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1 3940; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1 3941; EG-NEXT: ALU clause starting at 38: 3942; EG-NEXT: MOV * T35.X, KC0[2].Z, 3943; EG-NEXT: ALU clause starting at 39: 3944; EG-NEXT: LSHR * T40.W, T36.W, literal.x, 3945; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3946; EG-NEXT: AND_INT * T40.Z, T36.W, literal.x, 3947; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3948; EG-NEXT: LSHR T40.Y, T36.Z, literal.x, 3949; EG-NEXT: LSHR * T36.W, T36.Y, literal.x, 3950; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3951; EG-NEXT: AND_INT T40.X, T36.Z, literal.x, 3952; EG-NEXT: AND_INT T36.Z, T36.Y, literal.x, 3953; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3954; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3955; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3956; EG-NEXT: LSHR T36.Y, T36.X, literal.y, 3957; EG-NEXT: LSHR T42.W, T39.W, literal.y, 3958; EG-NEXT: AND_INT * T36.X, T36.X, literal.z, 3959; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3960; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3961; EG-NEXT: AND_INT * T42.Z, T39.W, literal.x, 3962; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3963; EG-NEXT: LSHR T43.X, KC0[2].Y, literal.x, 3964; EG-NEXT: LSHR T42.Y, T39.Z, literal.y, 3965; EG-NEXT: LSHR T39.W, T39.Y, literal.y, 3966; EG-NEXT: AND_INT * T42.X, T39.Z, literal.z, 3967; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3968; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3969; EG-NEXT: AND_INT T39.Z, T39.Y, literal.x, 3970; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3971; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3972; EG-NEXT: LSHR T44.X, PV.W, literal.x, 3973; EG-NEXT: LSHR T39.Y, T39.X, literal.y, 3974; EG-NEXT: LSHR T45.W, T38.W, literal.y, 3975; EG-NEXT: AND_INT * T39.X, T39.X, literal.z, 3976; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3977; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3978; EG-NEXT: AND_INT T45.Z, T38.W, literal.x, 3979; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3980; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 3981; EG-NEXT: LSHR T46.X, PV.W, literal.x, 3982; EG-NEXT: LSHR T45.Y, T38.Z, literal.y, 3983; EG-NEXT: LSHR T38.W, T38.Y, literal.y, 3984; EG-NEXT: AND_INT * T45.X, T38.Z, literal.z, 3985; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3986; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3987; EG-NEXT: AND_INT T38.Z, T38.Y, literal.x, 3988; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3989; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 3990; EG-NEXT: LSHR T47.X, PV.W, literal.x, 3991; EG-NEXT: LSHR T38.Y, T38.X, literal.y, 3992; EG-NEXT: AND_INT * T38.X, T38.X, literal.z, 3993; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3994; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3995; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 3996; EG-NEXT: LSHR * T35.W, T37.W, literal.y, 3997; EG-NEXT: 64(8.968310e-44), 16(2.242078e-44) 3998; EG-NEXT: LSHR T48.X, PV.W, literal.x, 3999; EG-NEXT: AND_INT * T35.Z, T37.W, literal.y, 4000; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 4001; EG-NEXT: ALU clause starting at 96: 4002; EG-NEXT: LSHR T35.Y, T37.Z, literal.x, 4003; EG-NEXT: LSHR * T37.W, T37.Y, literal.x, 4004; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4005; EG-NEXT: AND_INT T35.X, T37.Z, literal.x, 4006; EG-NEXT: AND_INT T37.Z, T37.Y, literal.x, 4007; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4008; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4009; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4010; EG-NEXT: LSHR T37.Y, T37.X, literal.y, 4011; EG-NEXT: LSHR T54.W, T52.W, literal.y, 4012; EG-NEXT: AND_INT * T37.X, T37.X, literal.z, 4013; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4014; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4015; EG-NEXT: AND_INT T54.Z, T52.W, literal.x, 4016; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4017; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4018; EG-NEXT: LSHR T55.X, PV.W, literal.x, 4019; EG-NEXT: LSHR T54.Y, T52.Z, literal.y, 4020; EG-NEXT: LSHR T52.W, T52.Y, literal.y, 4021; EG-NEXT: AND_INT * T54.X, T52.Z, literal.z, 4022; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4023; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4024; EG-NEXT: AND_INT T52.Z, T52.Y, literal.x, 4025; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4026; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4027; EG-NEXT: LSHR T56.X, PV.W, literal.x, 4028; EG-NEXT: LSHR T52.Y, T52.X, literal.y, 4029; EG-NEXT: LSHR T57.W, T51.W, literal.y, 4030; EG-NEXT: AND_INT * T52.X, T52.X, literal.z, 4031; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4032; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4033; EG-NEXT: AND_INT T57.Z, T51.W, literal.x, 4034; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4035; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4036; EG-NEXT: LSHR T58.X, PV.W, literal.x, 4037; EG-NEXT: LSHR T57.Y, T51.Z, literal.y, 4038; EG-NEXT: LSHR T51.W, T51.Y, literal.y, 4039; EG-NEXT: AND_INT * T57.X, T51.Z, literal.z, 4040; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4041; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4042; EG-NEXT: AND_INT T51.Z, T51.Y, literal.x, 4043; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4044; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4045; EG-NEXT: LSHR T59.X, PV.W, literal.x, 4046; EG-NEXT: LSHR T51.Y, T51.X, literal.y, 4047; EG-NEXT: LSHR T60.W, T50.W, literal.y, 4048; EG-NEXT: AND_INT * T51.X, T51.X, literal.z, 4049; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4050; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4051; EG-NEXT: AND_INT T60.Z, T50.W, literal.x, 4052; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4053; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4054; EG-NEXT: LSHR T61.X, PV.W, literal.x, 4055; EG-NEXT: LSHR T60.Y, T50.Z, literal.y, 4056; EG-NEXT: LSHR T50.W, T50.Y, literal.y, 4057; EG-NEXT: AND_INT * T60.X, T50.Z, literal.z, 4058; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4059; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4060; EG-NEXT: AND_INT T50.Z, T50.Y, literal.x, 4061; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4062; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4063; EG-NEXT: LSHR T62.X, PV.W, literal.x, 4064; EG-NEXT: LSHR T50.Y, T50.X, literal.y, 4065; EG-NEXT: LSHR T63.W, T49.W, literal.y, 4066; EG-NEXT: AND_INT * T50.X, T50.X, literal.z, 4067; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4068; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4069; EG-NEXT: AND_INT T63.Z, T49.W, literal.x, 4070; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4071; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4072; EG-NEXT: LSHR T64.X, PV.W, literal.x, 4073; EG-NEXT: LSHR T63.Y, T49.Z, literal.y, 4074; EG-NEXT: LSHR T49.W, T49.Y, literal.y, 4075; EG-NEXT: AND_INT * T63.X, T49.Z, literal.z, 4076; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4077; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4078; EG-NEXT: AND_INT T49.Z, T49.Y, literal.x, 4079; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4080; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4081; EG-NEXT: LSHR T65.X, PV.W, literal.x, 4082; EG-NEXT: LSHR T49.Y, T49.X, literal.y, 4083; EG-NEXT: AND_INT * T49.X, T49.X, literal.z, 4084; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4085; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4086; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4087; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4088; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 4089; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4090; 4091; CM-LABEL: global_zextload_v64i16_to_v64i32: 4092; CM: ; %bb.0: 4093; CM-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 4094; CM-NEXT: TEX 3 @22 4095; CM-NEXT: ALU 50, @39, KC0[CB0:0-32], KC1[] 4096; CM-NEXT: TEX 3 @30 4097; CM-NEXT: ALU 78, @90, KC0[CB0:0-32], KC1[] 4098; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4099; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T48.X 4100; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T64.X 4101; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T49.X 4102; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T61.X 4103; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T50.X 4104; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T56, T58.X 4105; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T54, T51.X 4106; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T53, T55.X 4107; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T37.X 4108; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T52.X 4109; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T38.X 4110; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T46.X 4111; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T39.X 4112; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T43.X 4113; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T36.X 4114; CM-NEXT: CF_END 4115; CM-NEXT: Fetch clause starting at 22: 4116; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 112, #1 4117; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 64, #1 4118; CM-NEXT: VTX_READ_128 T38.XYZW, T35.X, 80, #1 4119; CM-NEXT: VTX_READ_128 T39.XYZW, T35.X, 96, #1 4120; CM-NEXT: Fetch clause starting at 30: 4121; CM-NEXT: VTX_READ_128 T48.XYZW, T35.X, 0, #1 4122; CM-NEXT: VTX_READ_128 T49.XYZW, T35.X, 16, #1 4123; CM-NEXT: VTX_READ_128 T50.XYZW, T35.X, 32, #1 4124; CM-NEXT: VTX_READ_128 T51.XYZW, T35.X, 48, #1 4125; CM-NEXT: ALU clause starting at 38: 4126; CM-NEXT: MOV * T35.X, KC0[2].Z, 4127; CM-NEXT: ALU clause starting at 39: 4128; CM-NEXT: LSHR * T40.W, T36.Y, literal.x, 4129; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4130; CM-NEXT: AND_INT * T40.Z, T36.Y, literal.x, 4131; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4132; CM-NEXT: LSHR T40.Y, T36.X, literal.x, 4133; CM-NEXT: LSHR * T41.W, T36.W, literal.x, 4134; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4135; CM-NEXT: AND_INT T40.X, T36.X, literal.x, 4136; CM-NEXT: AND_INT T41.Z, T36.W, literal.x, 4137; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4138; CM-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 4139; CM-NEXT: LSHR T36.X, PV.W, literal.x, 4140; CM-NEXT: LSHR T41.Y, T36.Z, literal.y, 4141; CM-NEXT: LSHR * T42.W, T39.Y, literal.y, 4142; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4143; CM-NEXT: AND_INT T41.X, T36.Z, literal.x, 4144; CM-NEXT: AND_INT T42.Z, T39.Y, literal.x, 4145; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4146; CM-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4147; CM-NEXT: LSHR T43.X, PV.W, literal.x, 4148; CM-NEXT: LSHR T42.Y, T39.X, literal.y, 4149; CM-NEXT: LSHR * T44.W, T39.W, literal.y, 4150; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4151; CM-NEXT: AND_INT T42.X, T39.X, literal.x, 4152; CM-NEXT: AND_INT T44.Z, T39.W, literal.x, 4153; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4154; CM-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4155; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4156; CM-NEXT: LSHR T44.Y, T39.Z, literal.y, 4157; CM-NEXT: LSHR * T45.W, T38.Y, literal.y, 4158; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4159; CM-NEXT: AND_INT T44.X, T39.Z, literal.x, 4160; CM-NEXT: AND_INT T45.Z, T38.Y, literal.x, 4161; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4162; CM-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4163; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4164; CM-NEXT: LSHR T45.Y, T38.X, literal.y, 4165; CM-NEXT: LSHR * T47.W, T38.W, literal.y, 4166; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4167; CM-NEXT: AND_INT T45.X, T38.X, literal.x, 4168; CM-NEXT: AND_INT T47.Z, T38.W, literal.x, 4169; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4170; CM-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4171; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4172; CM-NEXT: LSHR T47.Y, T38.Z, literal.y, 4173; CM-NEXT: LSHR * T35.W, T37.Y, literal.y, 4174; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4175; CM-NEXT: AND_INT T47.X, T38.Z, literal.x, 4176; CM-NEXT: AND_INT T35.Z, T37.Y, literal.x, 4177; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4178; CM-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4179; CM-NEXT: ALU clause starting at 90: 4180; CM-NEXT: LSHR T52.X, T0.W, literal.x, 4181; CM-NEXT: LSHR T35.Y, T37.X, literal.y, 4182; CM-NEXT: LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212 4183; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4184; CM-NEXT: AND_INT T35.X, T37.X, literal.x, 4185; CM-NEXT: AND_INT T53.Z, T37.W, literal.x, 4186; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4187; CM-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4188; CM-NEXT: LSHR T37.X, PV.W, literal.x, 4189; CM-NEXT: LSHR T53.Y, T37.Z, literal.y, 4190; CM-NEXT: LSHR * T54.W, T51.Y, literal.y, 4191; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4192; CM-NEXT: AND_INT T53.X, T37.Z, literal.x, 4193; CM-NEXT: AND_INT T54.Z, T51.Y, literal.x, 4194; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4195; CM-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4196; CM-NEXT: LSHR T55.X, PV.W, literal.x, 4197; CM-NEXT: LSHR T54.Y, T51.X, literal.y, 4198; CM-NEXT: LSHR * T56.W, T51.W, literal.y, 4199; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4200; CM-NEXT: AND_INT T54.X, T51.X, literal.x, 4201; CM-NEXT: AND_INT T56.Z, T51.W, literal.x, 4202; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4203; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4204; CM-NEXT: LSHR T51.X, PV.W, literal.x, 4205; CM-NEXT: LSHR T56.Y, T51.Z, literal.y, 4206; CM-NEXT: LSHR * T57.W, T50.Y, literal.y, 4207; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4208; CM-NEXT: AND_INT T56.X, T51.Z, literal.x, 4209; CM-NEXT: AND_INT T57.Z, T50.Y, literal.x, 4210; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4211; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4212; CM-NEXT: LSHR T58.X, PV.W, literal.x, 4213; CM-NEXT: LSHR T57.Y, T50.X, literal.y, 4214; CM-NEXT: LSHR * T59.W, T50.W, literal.y, 4215; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4216; CM-NEXT: AND_INT T57.X, T50.X, literal.x, 4217; CM-NEXT: AND_INT T59.Z, T50.W, literal.x, 4218; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4219; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 4220; CM-NEXT: LSHR T50.X, PV.W, literal.x, 4221; CM-NEXT: LSHR T59.Y, T50.Z, literal.y, 4222; CM-NEXT: LSHR * T60.W, T49.Y, literal.y, 4223; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4224; CM-NEXT: AND_INT T59.X, T50.Z, literal.x, 4225; CM-NEXT: AND_INT T60.Z, T49.Y, literal.x, 4226; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4227; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 4228; CM-NEXT: LSHR T61.X, PV.W, literal.x, 4229; CM-NEXT: LSHR T60.Y, T49.X, literal.y, 4230; CM-NEXT: LSHR * T62.W, T49.W, literal.y, 4231; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4232; CM-NEXT: AND_INT T60.X, T49.X, literal.x, 4233; CM-NEXT: AND_INT T62.Z, T49.W, literal.x, 4234; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4235; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 4236; CM-NEXT: LSHR T49.X, PV.W, literal.x, 4237; CM-NEXT: LSHR T62.Y, T49.Z, literal.y, 4238; CM-NEXT: LSHR * T63.W, T48.Y, literal.y, 4239; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4240; CM-NEXT: AND_INT T62.X, T49.Z, literal.x, 4241; CM-NEXT: AND_INT T63.Z, T48.Y, literal.x, 4242; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4243; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 4244; CM-NEXT: LSHR T64.X, PV.W, literal.x, 4245; CM-NEXT: LSHR T63.Y, T48.X, literal.y, 4246; CM-NEXT: LSHR * T65.W, T48.W, literal.y, 4247; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4248; CM-NEXT: AND_INT T63.X, T48.X, literal.x, 4249; CM-NEXT: AND_INT * T65.Z, T48.W, literal.x, 4250; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4251; CM-NEXT: LSHR T48.X, KC0[2].Y, literal.x, 4252; CM-NEXT: LSHR * T65.Y, T48.Z, literal.y, 4253; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4254; CM-NEXT: AND_INT T65.X, T48.Z, literal.x, 4255; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4256; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4257; CM-NEXT: LSHR * T66.X, PV.W, literal.x, 4258; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4259 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 4260 %ext = zext <64 x i16> %load to <64 x i32> 4261 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 4262 ret void 4263} 4264 4265define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 4266; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32: 4267; GCN-NOHSA-SI: ; %bb.0: 4268; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 4269; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 4270; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 4271; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 4272; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s3 4273; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 4274; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 4275; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4276; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4277; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 4278; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 4279; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4280; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 4281; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 4282; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[32:35], off, s[8:11], 0 offset:112 4283; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:96 4284; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:80 4285; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:64 4286; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 4287; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:16 4288; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32 4289; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48 4290; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 4291; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v19 4292; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v18 4293; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v19, 0, 16 4294; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v18, 0, 16 4295; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 4296; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4297; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 4298; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 4299; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 4300; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v39, 16, v17 4301; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v37, 16, v16 4302; GCN-NOHSA-SI-NEXT: v_bfe_i32 v38, v17, 0, 16 4303; GCN-NOHSA-SI-NEXT: v_bfe_i32 v36, v16, 0, 16 4304; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v23 4305; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v22 4306; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v23, 0, 16 4307; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v22, 0, 16 4308; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v43, 16, v21 4309; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v41, 16, v20 4310; GCN-NOHSA-SI-NEXT: v_bfe_i32 v42, v21, 0, 16 4311; GCN-NOHSA-SI-NEXT: v_bfe_i32 v40, v20, 0, 16 4312; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 16, v27 4313; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 16, v26 4314; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v27, 0, 16 4315; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v26, 0, 16 4316; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v47, 16, v25 4317; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v45, 16, v24 4318; GCN-NOHSA-SI-NEXT: v_bfe_i32 v46, v25, 0, 16 4319; GCN-NOHSA-SI-NEXT: v_bfe_i32 v44, v24, 0, 16 4320; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 16, v31 4321; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v30 4322; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v31, 0, 16 4323; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v30, 0, 16 4324; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v51, 16, v29 4325; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v49, 16, v28 4326; GCN-NOHSA-SI-NEXT: v_bfe_i32 v50, v29, 0, 16 4327; GCN-NOHSA-SI-NEXT: v_bfe_i32 v48, v28, 0, 16 4328; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v15 4329; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v14 4330; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v15, 0, 16 4331; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v14, 0, 16 4332; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v55, 16, v13 4333; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v53, 16, v12 4334; GCN-NOHSA-SI-NEXT: v_bfe_i32 v54, v13, 0, 16 4335; GCN-NOHSA-SI-NEXT: v_bfe_i32 v52, v12, 0, 16 4336; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v11 4337; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v10 4338; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v11, 0, 16 4339; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v10, 0, 16 4340; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v59, 16, v9 4341; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v57, 16, v8 4342; GCN-NOHSA-SI-NEXT: v_bfe_i32 v58, v9, 0, 16 4343; GCN-NOHSA-SI-NEXT: v_bfe_i32 v56, v8, 0, 16 4344; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v7 4345; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v6 4346; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v7, 0, 16 4347; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v6, 0, 16 4348; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v63, 16, v5 4349; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v61, 16, v4 4350; GCN-NOHSA-SI-NEXT: v_bfe_i32 v62, v5, 0, 16 4351; GCN-NOHSA-SI-NEXT: v_bfe_i32 v60, v4, 0, 16 4352; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v35 4353; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v34 4354; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v35, 0, 16 4355; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v34, 0, 16 4356; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4357; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v33 4358; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v32 4359; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v33, 0, 16 4360; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v32, 0, 16 4361; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 4362; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 4363; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4364; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240 4365; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 4366; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 4367; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 4368; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176 4369; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 4370; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:144 4371; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 4372; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112 4373; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 4374; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 4375; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32 4376; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 4377; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 4378; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 4379; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 4380; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 4381; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 4382; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4383; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4384; GCN-NOHSA-SI-NEXT: s_endpgm 4385; 4386; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32: 4387; GCN-HSA: ; %bb.0: 4388; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4389; GCN-HSA-NEXT: s_movk_i32 s8, 0x70 4390; GCN-HSA-NEXT: s_movk_i32 s9, 0x60 4391; GCN-HSA-NEXT: s_movk_i32 s10, 0x50 4392; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4393; GCN-HSA-NEXT: s_add_u32 s4, s2, s8 4394; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4395; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4396; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4397; GCN-HSA-NEXT: s_add_u32 s4, s2, s9 4398; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4399; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 4400; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 4401; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4402; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 4403; GCN-HSA-NEXT: s_add_u32 s4, s2, s10 4404; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4405; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4406; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 4407; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 4408; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4409; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 4410; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 4411; GCN-HSA-NEXT: s_add_u32 s4, s2, 64 4412; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4413; GCN-HSA-NEXT: s_add_u32 s6, s2, 48 4414; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 4415; GCN-HSA-NEXT: v_mov_b32_e32 v17, s5 4416; GCN-HSA-NEXT: v_mov_b32_e32 v21, s7 4417; GCN-HSA-NEXT: v_mov_b32_e32 v16, s4 4418; GCN-HSA-NEXT: v_mov_b32_e32 v20, s6 4419; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] 4420; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[20:21] 4421; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 4422; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4423; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 4424; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 4425; GCN-HSA-NEXT: v_mov_b32_e32 v29, s5 4426; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 4427; GCN-HSA-NEXT: v_mov_b32_e32 v28, s4 4428; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 4429; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[28:29] 4430; GCN-HSA-NEXT: flat_load_dwordx4 v[32:35], v[32:33] 4431; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 4432; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 4433; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 4434; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4435; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 4436; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 16, v1 4437; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 16, v0 4438; GCN-HSA-NEXT: v_bfe_i32 v26, v1, 0, 16 4439; GCN-HSA-NEXT: v_bfe_i32 v24, v0, 0, 16 4440; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4441; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4442; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 4443; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 4444; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4445; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4446; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4447; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 4448; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 16, v3 4449; GCN-HSA-NEXT: v_bfe_i32 v26, v3, 0, 16 4450; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 16, v2 4451; GCN-HSA-NEXT: v_bfe_i32 v24, v2, 0, 16 4452; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4453; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 4454; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 4455; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 4456; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 4457; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 4458; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 4459; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4460; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4461; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 4462; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4463; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4464; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4465; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4466; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v7 4467; GCN-HSA-NEXT: v_bfe_i32 v2, v7, 0, 16 4468; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v6 4469; GCN-HSA-NEXT: v_bfe_i32 v0, v6, 0, 16 4470; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4471; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 4472; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 4473; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v13 4474; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v12 4475; GCN-HSA-NEXT: v_bfe_i32 v2, v13, 0, 16 4476; GCN-HSA-NEXT: v_bfe_i32 v0, v12, 0, 16 4477; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 4478; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4479; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 4480; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 4481; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 4482; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v9 4483; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v8 4484; GCN-HSA-NEXT: v_bfe_i32 v2, v9, 0, 16 4485; GCN-HSA-NEXT: v_bfe_i32 v0, v8, 0, 16 4486; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 4487; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 4488; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 4489; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v15 4490; GCN-HSA-NEXT: v_bfe_i32 v6, v15, 0, 16 4491; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v14 4492; GCN-HSA-NEXT: v_bfe_i32 v4, v14, 0, 16 4493; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 4494; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 4495; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 4496; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4497; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4498; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4499; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 4500; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4501; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4502; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4503; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 4504; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v11 4505; GCN-HSA-NEXT: v_bfe_i32 v13, v11, 0, 16 4506; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v10 4507; GCN-HSA-NEXT: v_bfe_i32 v11, v10, 0, 16 4508; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[11:14] 4509; GCN-HSA-NEXT: s_waitcnt vmcnt(11) 4510; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v17 4511; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v16 4512; GCN-HSA-NEXT: v_bfe_i32 v2, v17, 0, 16 4513; GCN-HSA-NEXT: v_bfe_i32 v0, v16, 0, 16 4514; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4515; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4516; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4517; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4518; GCN-HSA-NEXT: s_add_u32 s2, s0, s9 4519; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v19 4520; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v18 4521; GCN-HSA-NEXT: v_bfe_i32 v2, v19, 0, 16 4522; GCN-HSA-NEXT: v_bfe_i32 v0, v18, 0, 16 4523; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4524; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4525; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4526; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4527; GCN-HSA-NEXT: s_add_u32 s2, s0, s8 4528; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4529; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4530; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4531; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 4532; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4533; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 4534; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 4535; GCN-HSA-NEXT: s_add_u32 s2, s0, s10 4536; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 4537; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v23 4538; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v22 4539; GCN-HSA-NEXT: v_bfe_i32 v10, v23, 0, 16 4540; GCN-HSA-NEXT: v_bfe_i32 v8, v22, 0, 16 4541; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 4542; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 4543; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v29 4544; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v28 4545; GCN-HSA-NEXT: v_bfe_i32 v14, v29, 0, 16 4546; GCN-HSA-NEXT: v_bfe_i32 v12, v28, 0, 16 4547; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4548; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 4549; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v31 4550; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4551; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4552; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 4553; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4554; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v30 4555; GCN-HSA-NEXT: v_bfe_i32 v10, v31, 0, 16 4556; GCN-HSA-NEXT: v_bfe_i32 v8, v30, 0, 16 4557; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 4558; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 4559; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v21 4560; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4561; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v20 4562; GCN-HSA-NEXT: v_bfe_i32 v2, v21, 0, 16 4563; GCN-HSA-NEXT: v_bfe_i32 v0, v20, 0, 16 4564; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4565; GCN-HSA-NEXT: s_waitcnt vmcnt(14) 4566; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v33 4567; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v32 4568; GCN-HSA-NEXT: v_bfe_i32 v6, v33, 0, 16 4569; GCN-HSA-NEXT: v_bfe_i32 v4, v32, 0, 16 4570; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4571; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 4572; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 4573; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v35 4574; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4575; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v34 4576; GCN-HSA-NEXT: v_bfe_i32 v2, v35, 0, 16 4577; GCN-HSA-NEXT: v_bfe_i32 v0, v34, 0, 16 4578; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4579; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4580; GCN-HSA-NEXT: s_endpgm 4581; 4582; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32: 4583; GCN-NOHSA-VI: ; %bb.0: 4584; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4585; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 4586; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 4587; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 4588; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 4589; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s3 4590; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4591; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4592; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 4593; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4594; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 4595; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 4596; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 4597; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:96 4598; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:80 4599; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:64 4600; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 4601; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 4602; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:32 4603; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:48 4604; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 4605; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4606; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4607; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 4608; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v59, 16, v1 4609; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v57, 16, v0 4610; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 4611; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v51, 16, v9 4612; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 4613; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v15 4614; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v15, 0, 16 4615; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v14 4616; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v14, 0, 16 4617; GCN-NOHSA-VI-NEXT: buffer_store_dword v28, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 4618; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4619; GCN-NOHSA-VI-NEXT: buffer_store_dword v29, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 4620; GCN-NOHSA-VI-NEXT: buffer_store_dword v30, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 4621; GCN-NOHSA-VI-NEXT: buffer_store_dword v31, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 4622; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[60:63], off, s[8:11], 0 offset:112 4623; GCN-NOHSA-VI-NEXT: v_bfe_i32 v50, v9, 0, 16 4624; GCN-NOHSA-VI-NEXT: v_bfe_i32 v58, v1, 0, 16 4625; GCN-NOHSA-VI-NEXT: v_bfe_i32 v56, v0, 0, 16 4626; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v19 4627; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v19, 0, 16 4628; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v18 4629; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v18, 0, 16 4630; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v39, 16, v17 4631; GCN-NOHSA-VI-NEXT: v_bfe_i32 v38, v17, 0, 16 4632; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v37, 16, v16 4633; GCN-NOHSA-VI-NEXT: v_bfe_i32 v36, v16, 0, 16 4634; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v23 4635; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v23, 0, 16 4636; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v22 4637; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v22, 0, 16 4638; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v43, 16, v21 4639; GCN-NOHSA-VI-NEXT: v_bfe_i32 v42, v21, 0, 16 4640; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v41, 16, v20 4641; GCN-NOHSA-VI-NEXT: v_bfe_i32 v40, v20, 0, 16 4642; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v27 4643; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v27, 0, 16 4644; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v26 4645; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v26, 0, 16 4646; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v47, 16, v25 4647; GCN-NOHSA-VI-NEXT: v_bfe_i32 v46, v25, 0, 16 4648; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v45, 16, v24 4649; GCN-NOHSA-VI-NEXT: v_bfe_i32 v44, v24, 0, 16 4650; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v11 4651; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v11, 0, 16 4652; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v10 4653; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v10, 0, 16 4654; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v49, 16, v8 4655; GCN-NOHSA-VI-NEXT: v_bfe_i32 v48, v8, 0, 16 4656; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 4657; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16 4658; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 4659; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 4660; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 4661; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v13, 0, 16 4662; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 4663; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v12, 0, 16 4664; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v7 4665; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v7, 0, 16 4666; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v6 4667; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v6, 0, 16 4668; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v55, 16, v5 4669; GCN-NOHSA-VI-NEXT: v_bfe_i32 v54, v5, 0, 16 4670; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v53, 16, v4 4671; GCN-NOHSA-VI-NEXT: v_bfe_i32 v52, v4, 0, 16 4672; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4673; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v61 4674; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v60 4675; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v61, 0, 16 4676; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v60, 0, 16 4677; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v63 4678; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v62 4679; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v63, 0, 16 4680; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v62, 0, 16 4681; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4682; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240 4683; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:192 4684; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 4685; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:160 4686; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176 4687; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:128 4688; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:144 4689; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:96 4690; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112 4691; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:64 4692; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 4693; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:32 4694; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:48 4695; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 4696; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 4697; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 4698; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 4699; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 4700; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4701; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4702; GCN-NOHSA-VI-NEXT: s_endpgm 4703; 4704; EG-LABEL: global_sextload_v64i16_to_v64i32: 4705; EG: ; %bb.0: 4706; EG-NEXT: ALU 18, @38, KC0[CB0:0-32], KC1[] 4707; EG-NEXT: TEX 7 @22 4708; EG-NEXT: ALU 75, @57, KC0[CB0:0-32], KC1[] 4709; EG-NEXT: ALU 71, @133, KC0[CB0:0-32], KC1[] 4710; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0 4711; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 4712; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0 4713; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0 4714; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0 4715; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0 4716; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0 4717; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0 4718; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0 4719; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0 4720; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0 4721; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0 4722; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0 4723; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4724; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4725; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4726; EG-NEXT: CF_END 4727; EG-NEXT: PAD 4728; EG-NEXT: Fetch clause starting at 22: 4729; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4730; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4731; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4732; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4733; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4734; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4735; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4736; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4737; EG-NEXT: ALU clause starting at 38: 4738; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4739; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4740; EG-NEXT: LSHR T35.X, PV.W, literal.x, 4741; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 4742; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4743; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4744; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 4745; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4746; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4747; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4748; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4749; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4750; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4751; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4752; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4753; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4754; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4755; EG-NEXT: MOV * T41.X, KC0[2].Z, 4756; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4757; EG-NEXT: ALU clause starting at 57: 4758; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4759; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 4760; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4761; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4762; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 4763; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4764; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4765; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4766; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4767; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4768; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4769; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4770; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4771; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 4772; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4773; EG-NEXT: LSHR T0.Y, T41.Y, literal.y, 4774; EG-NEXT: LSHR T0.Z, T41.W, literal.y, 4775; EG-NEXT: LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212 4776; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4777; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4778; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) 4779; EG-NEXT: LSHR T54.X, PS, literal.x, 4780; EG-NEXT: LSHR T1.Y, T48.W, literal.y, 4781; EG-NEXT: LSHR T1.Z, T47.Y, literal.y, 4782; EG-NEXT: LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212 4783; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4784; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4785; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4786; EG-NEXT: LSHR T55.X, PS, literal.x, 4787; EG-NEXT: LSHR T2.Y, T46.Y, literal.y, 4788; EG-NEXT: LSHR T2.Z, T46.W, literal.y, 4789; EG-NEXT: LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212 4790; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4791; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4792; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4793; EG-NEXT: LSHR T56.X, PS, literal.x, 4794; EG-NEXT: LSHR T3.Y, T45.W, literal.y, 4795; EG-NEXT: BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212 4796; EG-NEXT: LSHR T3.W, T43.Y, literal.y, 4797; EG-NEXT: LSHR * T4.W, T43.W, literal.y, 4798; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4799; EG-NEXT: BFE_INT T57.X, T44.Z, 0.0, literal.x, 4800; EG-NEXT: LSHR T4.Y, T42.Y, literal.x, 4801; EG-NEXT: BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4802; EG-NEXT: LSHR T5.W, T42.W, literal.x, 4803; EG-NEXT: LSHR * T6.W, T44.W, literal.x, 4804; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4805; EG-NEXT: BFE_INT T58.X, T44.X, 0.0, literal.x, 4806; EG-NEXT: LSHR T5.Y, T44.Y, literal.x, 4807; EG-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 4808; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4809; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4810; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4811; EG-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 4812; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4813; EG-NEXT: BFE_INT T44.Z, T42.Y, 0.0, literal.x, 4814; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4815; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4816; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4817; EG-NEXT: BFE_INT T44.X, T42.X, 0.0, literal.x, 4818; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4819; EG-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 4820; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212 4821; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4822; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4823; EG-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 4824; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4825; EG-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 4826; EG-NEXT: BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4827; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4828; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4829; EG-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 4830; EG-NEXT: BFE_INT T44.Y, PS, 0.0, literal.x, 4831; EG-NEXT: BFE_INT T61.Z, T45.W, 0.0, literal.x, 4832; EG-NEXT: BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212 4833; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4834; EG-NEXT: ALU clause starting at 133: 4835; EG-NEXT: LSHR * T4.W, T43.Z, literal.x, 4836; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4837; EG-NEXT: BFE_INT T61.X, T45.Z, 0.0, literal.x, 4838; EG-NEXT: BFE_INT T60.Y, PV.W, 0.0, literal.x, 4839; EG-NEXT: BFE_INT T43.Z, T45.Y, 0.0, literal.x, 4840; EG-NEXT: BFE_INT T42.W, T3.W, 0.0, literal.x, 4841; EG-NEXT: LSHR * T3.W, T43.X, literal.x, 4842; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4843; EG-NEXT: BFE_INT T43.X, T45.X, 0.0, literal.x, 4844; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4845; EG-NEXT: BFE_INT T62.Z, T46.W, 0.0, literal.x, 4846; EG-NEXT: BFE_INT T61.W, T3.Y, 0.0, literal.x, 4847; EG-NEXT: LSHR * T3.W, T45.Z, literal.x, 4848; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4849; EG-NEXT: BFE_INT T62.X, T46.Z, 0.0, literal.x, 4850; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4851; EG-NEXT: BFE_INT T45.Z, T46.Y, 0.0, literal.x, 4852; EG-NEXT: BFE_INT T43.W, T2.W, 0.0, literal.x, 4853; EG-NEXT: LSHR * T2.W, T45.X, literal.x, 4854; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4855; EG-NEXT: BFE_INT T45.X, T46.X, 0.0, literal.x, 4856; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4857; EG-NEXT: BFE_INT T63.Z, T47.W, 0.0, literal.x, 4858; EG-NEXT: BFE_INT T62.W, T2.Z, 0.0, literal.x, 4859; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4860; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4861; EG-NEXT: BFE_INT T63.X, T47.Z, 0.0, literal.x, 4862; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4863; EG-NEXT: BFE_INT T46.Z, T47.Y, 0.0, literal.x, 4864; EG-NEXT: BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4865; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4866; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4867; EG-NEXT: BFE_INT T46.X, T47.X, 0.0, literal.x, 4868; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4869; EG-NEXT: BFE_INT T64.Z, T48.W, 0.0, literal.x, 4870; EG-NEXT: BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 4871; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4872; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4873; EG-NEXT: BFE_INT T64.X, T48.Z, 0.0, literal.x, 4874; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4875; EG-NEXT: BFE_INT T47.Z, T48.Y, 0.0, literal.x, 4876; EG-NEXT: BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4877; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4878; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4879; EG-NEXT: BFE_INT T47.X, T48.X, 0.0, literal.x, 4880; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4881; EG-NEXT: BFE_INT T65.Z, T41.W, 0.0, literal.x, 4882; EG-NEXT: BFE_INT T64.W, T1.Y, 0.0, literal.x, 4883; EG-NEXT: LSHR * T1.W, T48.Z, literal.x, 4884; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4885; EG-NEXT: BFE_INT T65.X, T41.Z, 0.0, literal.x, 4886; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4887; EG-NEXT: BFE_INT T48.Z, T41.Y, 0.0, literal.x, 4888; EG-NEXT: BFE_INT T47.W, T0.W, 0.0, literal.x, 4889; EG-NEXT: LSHR * T0.W, T48.X, literal.x, 4890; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4891; EG-NEXT: BFE_INT T48.X, T41.X, 0.0, literal.x, 4892; EG-NEXT: BFE_INT T47.Y, PS, 0.0, literal.x, 4893; EG-NEXT: LSHR T1.Z, T41.Z, literal.x, 4894; EG-NEXT: BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4895; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4896; EG-NEXT: 16(2.242078e-44), 240(3.363116e-43) 4897; EG-NEXT: LSHR T66.X, PS, literal.x, 4898; EG-NEXT: BFE_INT T65.Y, PV.Z, 0.0, literal.y, 4899; EG-NEXT: LSHR T0.Z, T41.X, literal.y, 4900; EG-NEXT: BFE_INT T48.W, T0.Y, 0.0, literal.y, 4901; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4902; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4903; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4904; EG-NEXT: LSHR T41.X, PS, literal.x, 4905; EG-NEXT: BFE_INT * T48.Y, PV.Z, 0.0, literal.y, 4906; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4907; 4908; CM-LABEL: global_sextload_v64i16_to_v64i32: 4909; CM: ; %bb.0: 4910; CM-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[] 4911; CM-NEXT: TEX 1 @24 4912; CM-NEXT: ALU 15, @41, KC0[CB0:0-32], KC1[] 4913; CM-NEXT: TEX 5 @28 4914; CM-NEXT: ALU 82, @57, KC0[CB0:0-32], KC1[] 4915; CM-NEXT: ALU 72, @140, KC0[CB0:0-32], KC1[] 4916; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4917; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T37.X 4918; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T64, T56.X 4919; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T55.X 4920; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T54.X 4921; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T53.X 4922; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T52.X 4923; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T51.X 4924; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T61, T50.X 4925; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T49.X 4926; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T48.X 4927; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T47.X 4928; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T46.X 4929; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T40.X 4930; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T58, T39.X 4931; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T38.X 4932; CM-NEXT: CF_END 4933; CM-NEXT: PAD 4934; CM-NEXT: Fetch clause starting at 24: 4935; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 16, #1 4936; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 0, #1 4937; CM-NEXT: Fetch clause starting at 28: 4938; CM-NEXT: VTX_READ_128 T41.XYZW, T35.X, 112, #1 4939; CM-NEXT: VTX_READ_128 T42.XYZW, T35.X, 96, #1 4940; CM-NEXT: VTX_READ_128 T43.XYZW, T35.X, 80, #1 4941; CM-NEXT: VTX_READ_128 T44.XYZW, T35.X, 64, #1 4942; CM-NEXT: VTX_READ_128 T45.XYZW, T35.X, 48, #1 4943; CM-NEXT: VTX_READ_128 T35.XYZW, T35.X, 32, #1 4944; CM-NEXT: ALU clause starting at 40: 4945; CM-NEXT: MOV * T35.X, KC0[2].Z, 4946; CM-NEXT: ALU clause starting at 41: 4947; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4948; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4949; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4950; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4951; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 4952; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4953; CM-NEXT: LSHR T0.Y, T37.Z, literal.y, 4954; CM-NEXT: LSHR T0.Z, T37.W, literal.y, 4955; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4956; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4957; CM-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4958; CM-NEXT: LSHR T40.X, PV.W, literal.x, 4959; CM-NEXT: LSHR T1.Y, T37.Y, literal.y, 4960; CM-NEXT: LSHR T1.Z, T36.Z, literal.y, 4961; CM-NEXT: LSHR * T0.W, T36.W, literal.y, 4962; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4963; CM-NEXT: ALU clause starting at 57: 4964; CM-NEXT: LSHR T2.Z, T36.X, literal.x, 4965; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4966; CM-NEXT: 16(2.242078e-44), 208(2.914701e-43) 4967; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4968; CM-NEXT: LSHR T2.Y, T36.Y, literal.y, 4969; CM-NEXT: LSHR T3.Z, T35.Z, literal.y, 4970; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4971; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4972; CM-NEXT: 160(2.242078e-43), 0(0.000000e+00) 4973; CM-NEXT: LSHR T47.X, PV.W, literal.x, 4974; CM-NEXT: LSHR T3.Y, T35.W, literal.y, 4975; CM-NEXT: LSHR T4.Z, T35.X, literal.y, 4976; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4977; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4978; CM-NEXT: 176(2.466285e-43), 0(0.000000e+00) 4979; CM-NEXT: LSHR T48.X, PV.W, literal.x, 4980; CM-NEXT: LSHR T4.Y, T35.Y, literal.y, 4981; CM-NEXT: LSHR T5.Z, T45.Z, literal.y, 4982; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4983; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4984; CM-NEXT: 128(1.793662e-43), 0(0.000000e+00) 4985; CM-NEXT: LSHR T49.X, PV.W, literal.x, 4986; CM-NEXT: LSHR T5.Y, T45.W, literal.y, 4987; CM-NEXT: LSHR T6.Z, T45.X, literal.y, 4988; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4989; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4990; CM-NEXT: 144(2.017870e-43), 0(0.000000e+00) 4991; CM-NEXT: LSHR T50.X, PV.W, literal.x, 4992; CM-NEXT: LSHR T6.Y, T45.Y, literal.y, 4993; CM-NEXT: LSHR T7.Z, T44.Z, literal.y, 4994; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4995; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4996; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 4997; CM-NEXT: LSHR T51.X, PV.W, literal.x, 4998; CM-NEXT: LSHR T7.Y, T44.W, literal.y, 4999; CM-NEXT: LSHR T8.Z, T44.X, literal.y, 5000; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5001; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5002; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 5003; CM-NEXT: LSHR T52.X, PV.W, literal.x, 5004; CM-NEXT: LSHR T8.Y, T44.Y, literal.y, 5005; CM-NEXT: LSHR T9.Z, T43.Z, literal.y, 5006; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5007; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5008; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 5009; CM-NEXT: LSHR T53.X, PV.W, literal.x, 5010; CM-NEXT: LSHR T9.Y, T43.W, literal.y, 5011; CM-NEXT: LSHR T10.Z, T43.X, literal.y, 5012; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5013; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5014; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 5015; CM-NEXT: LSHR T54.X, PV.W, literal.x, 5016; CM-NEXT: LSHR T10.Y, T43.Y, literal.y, 5017; CM-NEXT: LSHR T11.Z, T42.Z, literal.y, 5018; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5019; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5020; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 5021; CM-NEXT: LSHR T55.X, PV.W, literal.x, 5022; CM-NEXT: LSHR T11.Y, T42.W, literal.y, 5023; CM-NEXT: LSHR T12.Z, T42.X, literal.y, 5024; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5025; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5026; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5027; CM-NEXT: LSHR T56.X, PV.W, literal.x, 5028; CM-NEXT: LSHR T12.Y, T42.Y, literal.y, 5029; CM-NEXT: BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212 5030; CM-NEXT: LSHR * T1.W, T41.Z, literal.y, 5031; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5032; CM-NEXT: BFE_INT T57.X, T41.X, 0.0, literal.x, 5033; CM-NEXT: LSHR T13.Y, T41.W, literal.x, 5034; CM-NEXT: BFE_INT T58.Z, T41.W, 0.0, literal.x, 5035; CM-NEXT: LSHR * T2.W, T41.Y, literal.x, 5036; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5037; CM-NEXT: BFE_INT T58.X, T41.Z, 0.0, literal.x, 5038; CM-NEXT: LSHR T14.Y, T41.X, literal.x, 5039; CM-NEXT: BFE_INT T41.Z, T42.Y, 0.0, literal.x, 5040; CM-NEXT: BFE_INT * T57.W, PV.W, 0.0, literal.x, 5041; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5042; CM-NEXT: BFE_INT T41.X, T42.X, 0.0, literal.x, 5043; CM-NEXT: BFE_INT T57.Y, PV.Y, 0.0, literal.x, 5044; CM-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 5045; CM-NEXT: BFE_INT * T58.W, T13.Y, 0.0, literal.x, 5046; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5047; CM-NEXT: ALU clause starting at 140: 5048; CM-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 5049; CM-NEXT: BFE_INT T58.Y, T1.W, 0.0, literal.x, 5050; CM-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 5051; CM-NEXT: BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5052; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5053; CM-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 5054; CM-NEXT: BFE_INT T41.Y, T12.Z, 0.0, literal.x, 5055; CM-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 5056; CM-NEXT: BFE_INT * T59.W, T11.Y, 0.0, literal.x, 5057; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5058; CM-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 5059; CM-NEXT: BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5060; CM-NEXT: BFE_INT T43.Z, T44.Y, 0.0, literal.x, 5061; CM-NEXT: BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5062; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5063; CM-NEXT: BFE_INT T43.X, T44.X, 0.0, literal.x, 5064; CM-NEXT: BFE_INT T42.Y, T10.Z, 0.0, literal.x, 5065; CM-NEXT: BFE_INT T61.Z, T44.W, 0.0, literal.x, 5066; CM-NEXT: BFE_INT * T60.W, T9.Y, 0.0, literal.x, 5067; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5068; CM-NEXT: BFE_INT T61.X, T44.Z, 0.0, literal.x, 5069; CM-NEXT: BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5070; CM-NEXT: BFE_INT T44.Z, T45.Y, 0.0, literal.x, 5071; CM-NEXT: BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5072; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5073; CM-NEXT: BFE_INT T44.X, T45.X, 0.0, literal.x, 5074; CM-NEXT: BFE_INT T43.Y, T8.Z, 0.0, literal.x, 5075; CM-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 5076; CM-NEXT: BFE_INT * T61.W, T7.Y, 0.0, literal.x, 5077; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5078; CM-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 5079; CM-NEXT: BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5080; CM-NEXT: BFE_INT T45.Z, T35.Y, 0.0, literal.x, 5081; CM-NEXT: BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5082; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5083; CM-NEXT: BFE_INT T45.X, T35.X, 0.0, literal.x, 5084; CM-NEXT: BFE_INT T44.Y, T6.Z, 0.0, literal.x, 5085; CM-NEXT: BFE_INT T63.Z, T35.W, 0.0, literal.x, 5086; CM-NEXT: BFE_INT * T62.W, T5.Y, 0.0, literal.x, 5087; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5088; CM-NEXT: BFE_INT T63.X, T35.Z, 0.0, literal.x, 5089; CM-NEXT: BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5090; CM-NEXT: BFE_INT T35.Z, T36.Y, 0.0, literal.x, 5091; CM-NEXT: BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5092; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5093; CM-NEXT: BFE_INT T35.X, T36.X, 0.0, literal.x, 5094; CM-NEXT: BFE_INT T45.Y, T4.Z, 0.0, literal.x, 5095; CM-NEXT: BFE_INT T64.Z, T36.W, 0.0, literal.x, 5096; CM-NEXT: BFE_INT * T63.W, T3.Y, 0.0, literal.x, 5097; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5098; CM-NEXT: BFE_INT T64.X, T36.Z, 0.0, literal.x, 5099; CM-NEXT: BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5100; CM-NEXT: BFE_INT T36.Z, T37.Y, 0.0, literal.x, 5101; CM-NEXT: BFE_INT * T35.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5102; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5103; CM-NEXT: BFE_INT T36.X, T37.X, 0.0, literal.x, 5104; CM-NEXT: BFE_INT T35.Y, T2.Z, 0.0, literal.x, 5105; CM-NEXT: BFE_INT T65.Z, T37.W, 0.0, literal.x, 5106; CM-NEXT: BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 5107; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5108; CM-NEXT: BFE_INT T65.X, T37.Z, 0.0, literal.x, 5109; CM-NEXT: BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5110; CM-NEXT: LSHR T1.Z, T37.X, literal.x, 5111; CM-NEXT: BFE_INT * T36.W, T1.Y, 0.0, literal.x, 5112; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5113; CM-NEXT: LSHR T37.X, KC0[2].Y, literal.x, 5114; CM-NEXT: BFE_INT T36.Y, PV.Z, 0.0, literal.y, 5115; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 5116; CM-NEXT: BFE_INT * T65.W, T0.Z, 0.0, literal.y, 5117; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5118; CM-NEXT: LSHR T66.X, PV.Z, literal.x, 5119; CM-NEXT: BFE_INT * T65.Y, T0.Y, 0.0, literal.y, 5120; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5121 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 5122 %ext = sext <64 x i16> %load to <64 x i32> 5123 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 5124 ret void 5125} 5126 5127define amdgpu_kernel void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 5128; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64: 5129; GCN-NOHSA-SI: ; %bb.0: 5130; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5131; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5132; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5133; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5134; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5135; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5136; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5137; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5138; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5139; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5140; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5141; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5142; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5143; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5144; GCN-NOHSA-SI-NEXT: s_endpgm 5145; 5146; GCN-HSA-LABEL: global_zextload_i16_to_i64: 5147; GCN-HSA: ; %bb.0: 5148; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5149; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5150; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5151; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5152; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 5153; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 5154; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 5155; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 5156; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5157; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5158; GCN-HSA-NEXT: s_endpgm 5159; 5160; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64: 5161; GCN-NOHSA-VI: ; %bb.0: 5162; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5163; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5164; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5165; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5166; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5167; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5168; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5169; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5170; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5171; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5172; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5173; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5174; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5175; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5176; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 5177; GCN-NOHSA-VI-NEXT: s_endpgm 5178; 5179; EG-LABEL: global_zextload_i16_to_i64: 5180; EG: ; %bb.0: 5181; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5182; EG-NEXT: TEX 0 @6 5183; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5184; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5185; EG-NEXT: CF_END 5186; EG-NEXT: PAD 5187; EG-NEXT: Fetch clause starting at 6: 5188; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5189; EG-NEXT: ALU clause starting at 8: 5190; EG-NEXT: MOV * T0.X, KC0[2].Z, 5191; EG-NEXT: ALU clause starting at 9: 5192; EG-NEXT: MOV * T0.Y, 0.0, 5193; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5194; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5195; 5196; CM-LABEL: global_zextload_i16_to_i64: 5197; CM: ; %bb.0: 5198; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5199; CM-NEXT: TEX 0 @6 5200; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5201; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5202; CM-NEXT: CF_END 5203; CM-NEXT: PAD 5204; CM-NEXT: Fetch clause starting at 6: 5205; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5206; CM-NEXT: ALU clause starting at 8: 5207; CM-NEXT: MOV * T0.X, KC0[2].Z, 5208; CM-NEXT: ALU clause starting at 9: 5209; CM-NEXT: MOV * T0.Y, 0.0, 5210; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5211; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5212 %a = load i16, i16 addrspace(1)* %in 5213 %ext = zext i16 %a to i64 5214 store i64 %ext, i64 addrspace(1)* %out 5215 ret void 5216} 5217 5218; FIXME: Need to optimize this sequence to avoid extra bfe: 5219; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 5220; t31: i64 = any_extend t28 5221; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 5222 5223; TODO: These could be expanded earlier using ASHR 15 5224define amdgpu_kernel void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 5225; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64: 5226; GCN-NOHSA-SI: ; %bb.0: 5227; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5228; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5229; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5230; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5231; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5232; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5233; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5234; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5235; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5236; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5237; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5238; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5239; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5240; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5241; GCN-NOHSA-SI-NEXT: s_endpgm 5242; 5243; GCN-HSA-LABEL: global_sextload_i16_to_i64: 5244; GCN-HSA: ; %bb.0: 5245; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5246; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5247; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5248; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5249; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 5250; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 5251; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 5252; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5253; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5254; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5255; GCN-HSA-NEXT: s_endpgm 5256; 5257; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64: 5258; GCN-NOHSA-VI: ; %bb.0: 5259; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5260; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5261; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5262; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5263; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5264; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5265; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 5266; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 5267; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 5268; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 5269; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 5270; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5271; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5272; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5273; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 5274; GCN-NOHSA-VI-NEXT: s_endpgm 5275; 5276; EG-LABEL: global_sextload_i16_to_i64: 5277; EG: ; %bb.0: 5278; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5279; EG-NEXT: TEX 0 @6 5280; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5281; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5282; EG-NEXT: CF_END 5283; EG-NEXT: PAD 5284; EG-NEXT: Fetch clause starting at 6: 5285; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5286; EG-NEXT: ALU clause starting at 8: 5287; EG-NEXT: MOV * T0.X, KC0[2].Z, 5288; EG-NEXT: ALU clause starting at 9: 5289; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5290; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5291; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5292; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5293; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5294; 5295; CM-LABEL: global_sextload_i16_to_i64: 5296; CM: ; %bb.0: 5297; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5298; CM-NEXT: TEX 0 @6 5299; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5300; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5301; CM-NEXT: CF_END 5302; CM-NEXT: PAD 5303; CM-NEXT: Fetch clause starting at 6: 5304; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5305; CM-NEXT: ALU clause starting at 8: 5306; CM-NEXT: MOV * T0.X, KC0[2].Z, 5307; CM-NEXT: ALU clause starting at 9: 5308; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5309; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5310; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5311; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5312; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5313 %a = load i16, i16 addrspace(1)* %in 5314 %ext = sext i16 %a to i64 5315 store i64 %ext, i64 addrspace(1)* %out 5316 ret void 5317} 5318 5319define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 5320; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64: 5321; GCN-NOHSA-SI: ; %bb.0: 5322; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5323; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5324; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5325; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5326; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5327; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5328; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5329; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5330; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5331; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5332; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5333; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5334; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5335; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5336; GCN-NOHSA-SI-NEXT: s_endpgm 5337; 5338; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64: 5339; GCN-HSA: ; %bb.0: 5340; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5341; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5342; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5343; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5344; GCN-HSA-NEXT: flat_load_ushort v2, v[2:3] 5345; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 5346; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 5347; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 5348; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5349; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5350; GCN-HSA-NEXT: s_endpgm 5351; 5352; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64: 5353; GCN-NOHSA-VI: ; %bb.0: 5354; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5355; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5356; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5357; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5358; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5359; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5360; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5361; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5362; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5363; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5364; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5366; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5367; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5368; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 5369; GCN-NOHSA-VI-NEXT: s_endpgm 5370; 5371; EG-LABEL: global_zextload_v1i16_to_v1i64: 5372; EG: ; %bb.0: 5373; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5374; EG-NEXT: TEX 0 @6 5375; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5376; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5377; EG-NEXT: CF_END 5378; EG-NEXT: PAD 5379; EG-NEXT: Fetch clause starting at 6: 5380; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5381; EG-NEXT: ALU clause starting at 8: 5382; EG-NEXT: MOV * T0.X, KC0[2].Z, 5383; EG-NEXT: ALU clause starting at 9: 5384; EG-NEXT: MOV * T0.Y, 0.0, 5385; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5386; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5387; 5388; CM-LABEL: global_zextload_v1i16_to_v1i64: 5389; CM: ; %bb.0: 5390; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5391; CM-NEXT: TEX 0 @6 5392; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5393; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5394; CM-NEXT: CF_END 5395; CM-NEXT: PAD 5396; CM-NEXT: Fetch clause starting at 6: 5397; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5398; CM-NEXT: ALU clause starting at 8: 5399; CM-NEXT: MOV * T0.X, KC0[2].Z, 5400; CM-NEXT: ALU clause starting at 9: 5401; CM-NEXT: MOV * T0.Y, 0.0, 5402; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5403; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5404 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 5405 %ext = zext <1 x i16> %load to <1 x i64> 5406 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 5407 ret void 5408} 5409 5410; TODO: These could be expanded earlier using ASHR 15 5411define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 5412; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64: 5413; GCN-NOHSA-SI: ; %bb.0: 5414; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5415; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5416; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5417; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5418; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5419; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5420; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5421; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5422; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5423; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5424; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5425; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5426; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5427; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5428; GCN-NOHSA-SI-NEXT: s_endpgm 5429; 5430; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64: 5431; GCN-HSA: ; %bb.0: 5432; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5433; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5434; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5435; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5436; GCN-HSA-NEXT: flat_load_sshort v2, v[2:3] 5437; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 5438; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 5439; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5440; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5441; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 5442; GCN-HSA-NEXT: s_endpgm 5443; 5444; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64: 5445; GCN-NOHSA-VI: ; %bb.0: 5446; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5447; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5448; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5449; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5450; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5451; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5452; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 5453; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 5454; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 5455; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 5456; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 5457; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5458; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5459; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5460; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 5461; GCN-NOHSA-VI-NEXT: s_endpgm 5462; 5463; EG-LABEL: global_sextload_v1i16_to_v1i64: 5464; EG: ; %bb.0: 5465; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5466; EG-NEXT: TEX 0 @6 5467; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5468; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5469; EG-NEXT: CF_END 5470; EG-NEXT: PAD 5471; EG-NEXT: Fetch clause starting at 6: 5472; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5473; EG-NEXT: ALU clause starting at 8: 5474; EG-NEXT: MOV * T0.X, KC0[2].Z, 5475; EG-NEXT: ALU clause starting at 9: 5476; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5477; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5478; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5479; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5480; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5481; 5482; CM-LABEL: global_sextload_v1i16_to_v1i64: 5483; CM: ; %bb.0: 5484; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5485; CM-NEXT: TEX 0 @6 5486; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5487; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5488; CM-NEXT: CF_END 5489; CM-NEXT: PAD 5490; CM-NEXT: Fetch clause starting at 6: 5491; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5492; CM-NEXT: ALU clause starting at 8: 5493; CM-NEXT: MOV * T0.X, KC0[2].Z, 5494; CM-NEXT: ALU clause starting at 9: 5495; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5496; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5497; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5498; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5499; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5500 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 5501 %ext = sext <1 x i16> %load to <1 x i64> 5502 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 5503 ret void 5504} 5505 5506define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 5507; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64: 5508; GCN-NOHSA-SI: ; %bb.0: 5509; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5510; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5511; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5512; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5513; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5514; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5515; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5516; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5517; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5518; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5519; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5520; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5521; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5522; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5523; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5524; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5525; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5526; GCN-NOHSA-SI-NEXT: s_endpgm 5527; 5528; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64: 5529; GCN-HSA: ; %bb.0: 5530; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5531; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5532; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5533; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5534; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5535; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5536; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5537; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5538; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5539; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5540; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5541; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v0 5542; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5543; GCN-HSA-NEXT: s_endpgm 5544; 5545; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64: 5546; GCN-NOHSA-VI: ; %bb.0: 5547; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5548; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5549; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5550; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5551; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5552; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5553; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5554; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5555; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[8:11], 0 5556; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5557; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5558; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5559; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5560; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5561; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v2 5562; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 5563; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5564; GCN-NOHSA-VI-NEXT: s_endpgm 5565; 5566; EG-LABEL: global_zextload_v2i16_to_v2i64: 5567; EG: ; %bb.0: 5568; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5569; EG-NEXT: TEX 0 @6 5570; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 5571; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5572; EG-NEXT: CF_END 5573; EG-NEXT: PAD 5574; EG-NEXT: Fetch clause starting at 6: 5575; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5576; EG-NEXT: ALU clause starting at 8: 5577; EG-NEXT: MOV * T4.X, KC0[2].Z, 5578; EG-NEXT: ALU clause starting at 9: 5579; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 5580; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5581; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 5582; EG-NEXT: MOV T4.Y, 0.0, 5583; EG-NEXT: MOV T4.W, 0.0, 5584; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5585; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 5586; 5587; CM-LABEL: global_zextload_v2i16_to_v2i64: 5588; CM: ; %bb.0: 5589; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5590; CM-NEXT: TEX 0 @6 5591; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 5592; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5593; CM-NEXT: CF_END 5594; CM-NEXT: PAD 5595; CM-NEXT: Fetch clause starting at 6: 5596; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5597; CM-NEXT: ALU clause starting at 8: 5598; CM-NEXT: MOV * T4.X, KC0[2].Z, 5599; CM-NEXT: ALU clause starting at 9: 5600; CM-NEXT: LSHR * T4.Z, T4.X, literal.x, 5601; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5602; CM-NEXT: AND_INT T4.X, T4.X, literal.x, 5603; CM-NEXT: MOV T4.Y, 0.0, 5604; CM-NEXT: MOV * T4.W, 0.0, 5605; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5606; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 5607; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5608 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 5609 %ext = zext <2 x i16> %load to <2 x i64> 5610 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 5611 ret void 5612} 5613 5614define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 5615; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64: 5616; GCN-NOHSA-SI: ; %bb.0: 5617; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5618; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5619; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5620; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5621; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5622; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5623; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5624; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5625; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5626; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5627; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5628; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5629; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5630; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 5631; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5632; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v2, 0, 16 5633; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5634; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5635; GCN-NOHSA-SI-NEXT: s_endpgm 5636; 5637; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64: 5638; GCN-HSA: ; %bb.0: 5639; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5640; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5641; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5642; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5643; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5644; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5645; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5646; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5647; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5648; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 5649; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 5650; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5651; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5652; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5653; GCN-HSA-NEXT: s_endpgm 5654; 5655; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64: 5656; GCN-NOHSA-VI: ; %bb.0: 5657; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5658; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5659; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5660; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5661; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5662; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5663; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5664; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5665; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[8:11], 0 5666; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5667; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5668; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5669; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1 5670; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5671; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16 5672; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5673; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5674; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5675; GCN-NOHSA-VI-NEXT: s_endpgm 5676; 5677; EG-LABEL: global_sextload_v2i16_to_v2i64: 5678; EG: ; %bb.0: 5679; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5680; EG-NEXT: TEX 0 @6 5681; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5682; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5683; EG-NEXT: CF_END 5684; EG-NEXT: PAD 5685; EG-NEXT: Fetch clause starting at 6: 5686; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5687; EG-NEXT: ALU clause starting at 8: 5688; EG-NEXT: MOV * T4.X, KC0[2].Z, 5689; EG-NEXT: ALU clause starting at 9: 5690; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 5691; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5692; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 5693; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5694; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 5695; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5696; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5697; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 5698; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5699; 5700; CM-LABEL: global_sextload_v2i16_to_v2i64: 5701; CM: ; %bb.0: 5702; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5703; CM-NEXT: TEX 0 @6 5704; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5705; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5706; CM-NEXT: CF_END 5707; CM-NEXT: PAD 5708; CM-NEXT: Fetch clause starting at 6: 5709; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5710; CM-NEXT: ALU clause starting at 8: 5711; CM-NEXT: MOV * T4.X, KC0[2].Z, 5712; CM-NEXT: ALU clause starting at 9: 5713; CM-NEXT: ASHR * T4.W, T4.X, literal.x, 5714; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5715; CM-NEXT: ASHR * T4.Z, T4.X, literal.x, 5716; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5717; CM-NEXT: BFE_INT * T4.X, T4.X, 0.0, literal.x, 5718; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5719; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 5720; CM-NEXT: ASHR * T4.Y, PV.X, literal.y, 5721; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5722 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 5723 %ext = sext <2 x i16> %load to <2 x i64> 5724 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 5725 ret void 5726} 5727 5728define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 5729; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64: 5730; GCN-NOHSA-SI: ; %bb.0: 5731; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5732; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5733; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5734; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5735; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5736; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5737; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5738; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5739; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5741; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 5742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5743; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 5744; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 5745; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5746; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5747; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5748; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5749; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5750; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s2, v8 5751; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s2, v9 5752; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5753; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 5754; GCN-NOHSA-SI-NEXT: s_endpgm 5755; 5756; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64: 5757; GCN-HSA: ; %bb.0: 5758; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5759; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 5760; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5761; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5762; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5763; GCN-HSA-NEXT: flat_load_dwordx2 v[8:9], v[0:1] 5764; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5765; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5766; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5767; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 5768; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 5769; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5770; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 5771; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 5772; GCN-HSA-NEXT: v_mov_b32_e32 v7, v1 5773; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 5774; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5775; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5776; GCN-HSA-NEXT: v_and_b32_e32 v0, s4, v9 5777; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5778; GCN-HSA-NEXT: v_and_b32_e32 v4, s4, v8 5779; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 5780; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5781; GCN-HSA-NEXT: s_endpgm 5782; 5783; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64: 5784; GCN-NOHSA-VI: ; %bb.0: 5785; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5786; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5787; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5788; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5789; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5790; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5791; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5792; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5793; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5794; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 5795; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, 0 5796; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, 0 5797; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5798; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, 0 5800; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v5 5801; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5802; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s6, v9 5803; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v9 5804; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s6, v8 5805; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v8 5806; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 5807; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5808; GCN-NOHSA-VI-NEXT: s_endpgm 5809; 5810; EG-LABEL: global_zextload_v4i16_to_v4i64: 5811; EG: ; %bb.0: 5812; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5813; EG-NEXT: TEX 0 @6 5814; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 5815; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 5816; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 5817; EG-NEXT: CF_END 5818; EG-NEXT: Fetch clause starting at 6: 5819; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5820; EG-NEXT: ALU clause starting at 8: 5821; EG-NEXT: MOV * T5.X, KC0[2].Z, 5822; EG-NEXT: ALU clause starting at 9: 5823; EG-NEXT: MOV T2.X, T5.X, 5824; EG-NEXT: MOV * T3.X, T5.Y, 5825; EG-NEXT: MOV T0.Y, PV.X, 5826; EG-NEXT: MOV * T0.Z, PS, 5827; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x, 5828; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5829; EG-NEXT: AND_INT T5.X, T0.Z, literal.x, 5830; EG-NEXT: MOV T5.Y, 0.0, 5831; EG-NEXT: LSHR T6.Z, T0.Y, literal.y, 5832; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x, 5833; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5834; EG-NEXT: MOV T6.Y, 0.0, 5835; EG-NEXT: MOV T5.W, 0.0, 5836; EG-NEXT: MOV * T6.W, 0.0, 5837; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 5838; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5839; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5840; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 5841; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5842; 5843; CM-LABEL: global_zextload_v4i16_to_v4i64: 5844; CM: ; %bb.0: 5845; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5846; CM-NEXT: TEX 0 @6 5847; CM-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5848; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T8.X 5849; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T7.X 5850; CM-NEXT: CF_END 5851; CM-NEXT: Fetch clause starting at 6: 5852; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5853; CM-NEXT: ALU clause starting at 8: 5854; CM-NEXT: MOV * T5.X, KC0[2].Z, 5855; CM-NEXT: ALU clause starting at 9: 5856; CM-NEXT: MOV * T2.X, T5.X, 5857; CM-NEXT: MOV * T3.X, T5.Y, 5858; CM-NEXT: MOV T0.Y, PV.X, 5859; CM-NEXT: MOV * T0.Z, T2.X, 5860; CM-NEXT: LSHR * T5.Z, PV.Z, literal.x, 5861; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5862; CM-NEXT: AND_INT T5.X, T0.Z, literal.x, 5863; CM-NEXT: MOV T5.Y, 0.0, 5864; CM-NEXT: LSHR * T6.Z, T0.Y, literal.y, 5865; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5866; CM-NEXT: AND_INT T6.X, T0.Y, literal.x, 5867; CM-NEXT: MOV T6.Y, 0.0, 5868; CM-NEXT: MOV * T5.W, 0.0, 5869; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5870; CM-NEXT: MOV * T6.W, 0.0, 5871; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5872; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5873; CM-NEXT: LSHR * T7.X, PV.W, literal.x, 5874; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5875; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x, 5876; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5877 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 5878 %ext = zext <4 x i16> %load to <4 x i64> 5879 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 5880 ret void 5881} 5882 5883define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 5884; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64: 5885; GCN-NOHSA-SI: ; %bb.0: 5886; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5887; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5888; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5889; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5890; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5891; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5892; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5893; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5894; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5895; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5896; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5897; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5898; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v2 5899; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 5900; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 5901; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5902; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v3, 0, 16 5903; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5904; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 5905; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5906; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5907; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 5908; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5909; GCN-NOHSA-SI-NEXT: s_endpgm 5910; 5911; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64: 5912; GCN-HSA: ; %bb.0: 5913; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5914; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5915; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5916; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5917; GCN-HSA-NEXT: flat_load_dwordx2 v[1:2], v[0:1] 5918; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5919; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5920; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 5921; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 5922; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 5923; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 5924; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5925; GCN-HSA-NEXT: v_mov_b32_e32 v3, v2 5926; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v1 5927; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5928; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 5929; GCN-HSA-NEXT: v_bfe_i32 v4, v3, 0, 16 5930; GCN-HSA-NEXT: v_bfe_i32 v0, v1, 0, 16 5931; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5932; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5933; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5934; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5935; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5936; GCN-HSA-NEXT: s_endpgm 5937; 5938; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64: 5939; GCN-NOHSA-VI: ; %bb.0: 5940; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5941; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5942; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5943; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 5944; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 5945; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5946; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 5947; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 5948; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5949; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5950; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5951; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5952; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v2 5953; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 5954; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 5955; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 5956; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16 5957; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5958; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v3, 0, 16 5959; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5960; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5961; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5962; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5963; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 5964; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5965; GCN-NOHSA-VI-NEXT: s_endpgm 5966; 5967; EG-LABEL: global_sextload_v4i16_to_v4i64: 5968; EG: ; %bb.0: 5969; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5970; EG-NEXT: TEX 0 @6 5971; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5972; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 5973; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5974; EG-NEXT: CF_END 5975; EG-NEXT: Fetch clause starting at 6: 5976; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5977; EG-NEXT: ALU clause starting at 8: 5978; EG-NEXT: MOV * T5.X, KC0[2].Z, 5979; EG-NEXT: ALU clause starting at 9: 5980; EG-NEXT: MOV T2.X, T5.X, 5981; EG-NEXT: MOV * T3.X, T5.Y, 5982; EG-NEXT: MOV T0.Y, PS, 5983; EG-NEXT: MOV * T0.Z, PV.X, 5984; EG-NEXT: ASHR * T5.W, PV.Z, literal.x, 5985; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5986; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5987; EG-NEXT: ASHR T5.Z, T0.Z, literal.y, 5988; EG-NEXT: ASHR * T7.W, T0.Y, literal.z, 5989; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5990; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5991; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 5992; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x, 5993; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5994; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 5995; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 5996; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5997; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5998; EG-NEXT: LSHR T8.X, PV.W, literal.x, 5999; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 6000; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6001; 6002; CM-LABEL: global_sextload_v4i16_to_v4i64: 6003; CM: ; %bb.0: 6004; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 6005; CM-NEXT: TEX 0 @6 6006; CM-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 6007; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X 6008; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 6009; CM-NEXT: CF_END 6010; CM-NEXT: Fetch clause starting at 6: 6011; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6012; CM-NEXT: ALU clause starting at 8: 6013; CM-NEXT: MOV * T5.X, KC0[2].Z, 6014; CM-NEXT: ALU clause starting at 9: 6015; CM-NEXT: MOV * T2.X, T5.X, 6016; CM-NEXT: MOV T3.X, T5.Y, 6017; CM-NEXT: MOV * T0.Y, PV.X, 6018; CM-NEXT: MOV * T0.Z, PV.X, 6019; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x, 6020; CM-NEXT: ASHR * T5.W, PV.Z, literal.y, 6021; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6022; CM-NEXT: LSHR T6.X, PV.Z, literal.x, 6023; CM-NEXT: ASHR T5.Z, T0.Z, literal.y, 6024; CM-NEXT: ASHR * T7.W, T0.Y, literal.z, 6025; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6026; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6027; CM-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 6028; CM-NEXT: ASHR * T7.Z, T0.Y, literal.x, 6029; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6030; CM-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 6031; CM-NEXT: ASHR * T5.Y, PV.X, literal.y, 6032; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6033; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6034; CM-NEXT: ASHR * T7.Y, PV.X, literal.y, 6035; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6036 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 6037 %ext = sext <4 x i16> %load to <4 x i64> 6038 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 6039 ret void 6040} 6041 6042define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 6043; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64: 6044; GCN-NOHSA-SI: ; %bb.0: 6045; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 6046; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 6047; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 6048; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, 0xffff 6049; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 6050; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 6051; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6052; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 6053; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 6054; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6055; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 6056; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, 0 6057; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, 0 6058; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v9 6059; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v9 6060; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v9 6061; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v9 6062; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v9 6063; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 6064; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 6065; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6066; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v1 6067; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v3 6068; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6069; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6070; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s12, v0 6071; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, s12, v2 6072; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, s12, v1 6073; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s12, v3 6074; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 6075; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:16 6076; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32 6077; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 6078; GCN-NOHSA-SI-NEXT: s_endpgm 6079; 6080; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64: 6081; GCN-HSA: ; %bb.0: 6082; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6083; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 6084; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 6085; GCN-HSA-NEXT: v_mov_b32_e32 v14, v12 6086; GCN-HSA-NEXT: v_mov_b32_e32 v15, v12 6087; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6088; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6089; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6090; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6091; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6092; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6093; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6094; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6095; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6096; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6097; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6098; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6099; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6100; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6101; GCN-HSA-NEXT: v_mov_b32_e32 v8, v12 6102; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6103; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6104; GCN-HSA-NEXT: v_mov_b32_e32 v6, 0 6105; GCN-HSA-NEXT: v_mov_b32_e32 v10, 0 6106; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6107; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v3 6108; GCN-HSA-NEXT: v_and_b32_e32 v11, s4, v3 6109; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[11:14] 6110; GCN-HSA-NEXT: v_mov_b32_e32 v4, v12 6111; GCN-HSA-NEXT: v_mov_b32_e32 v13, v12 6112; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v1 6113; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 6114; GCN-HSA-NEXT: v_and_b32_e32 v3, s4, v0 6115; GCN-HSA-NEXT: v_and_b32_e32 v12, s4, v1 6116; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 6117; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 6118; GCN-HSA-NEXT: v_and_b32_e32 v7, s4, v2 6119; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 6120; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 6121; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 6122; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[3:6] 6123; GCN-HSA-NEXT: s_endpgm 6124; 6125; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64: 6126; GCN-NOHSA-VI: ; %bb.0: 6127; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6128; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6129; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6130; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6131; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6132; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6133; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6134; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6135; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6136; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 6137; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, 0 6138; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 6139; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6140; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6141; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, 0 6142; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, 0 6143; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 6144; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v17 6145; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v17 6146; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v17 6147; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6148; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s6, v3 6149; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v3 6150; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s6, v0 6151; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6152; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s6, v1 6153; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 6154; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s6, v2 6155; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6156; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 6157; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6158; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6159; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6160; GCN-NOHSA-VI-NEXT: s_endpgm 6161; 6162; EG-LABEL: global_zextload_v8i16_to_v8i64: 6163; EG: ; %bb.0: 6164; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6165; EG-NEXT: TEX 0 @8 6166; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 6167; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 6168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 6169; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 6170; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 6171; EG-NEXT: CF_END 6172; EG-NEXT: Fetch clause starting at 8: 6173; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6174; EG-NEXT: ALU clause starting at 10: 6175; EG-NEXT: MOV * T7.X, KC0[2].Z, 6176; EG-NEXT: ALU clause starting at 11: 6177; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 6178; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6179; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 6180; EG-NEXT: MOV T8.Y, 0.0, 6181; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 6182; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 6183; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6184; EG-NEXT: MOV T9.Y, 0.0, 6185; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 6186; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6187; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 6188; EG-NEXT: MOV T10.Y, 0.0, 6189; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 6190; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 6191; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6192; EG-NEXT: MOV T7.Y, 0.0, 6193; EG-NEXT: MOV T8.W, 0.0, 6194; EG-NEXT: MOV * T9.W, 0.0, 6195; EG-NEXT: MOV T10.W, 0.0, 6196; EG-NEXT: MOV * T7.W, 0.0, 6197; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 6198; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6199; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6200; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6201; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6202; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6203; EG-NEXT: LSHR T13.X, PV.W, literal.x, 6204; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6205; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6206; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 6207; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6208; 6209; CM-LABEL: global_zextload_v8i16_to_v8i64: 6210; CM: ; %bb.0: 6211; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6212; CM-NEXT: TEX 0 @8 6213; CM-NEXT: ALU 32, @11, KC0[CB0:0-32], KC1[] 6214; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T14.X 6215; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T13.X 6216; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T12.X 6217; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T11.X 6218; CM-NEXT: CF_END 6219; CM-NEXT: Fetch clause starting at 8: 6220; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6221; CM-NEXT: ALU clause starting at 10: 6222; CM-NEXT: MOV * T7.X, KC0[2].Z, 6223; CM-NEXT: ALU clause starting at 11: 6224; CM-NEXT: LSHR * T8.Z, T7.X, literal.x, 6225; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6226; CM-NEXT: AND_INT T8.X, T7.X, literal.x, 6227; CM-NEXT: MOV T8.Y, 0.0, 6228; CM-NEXT: LSHR * T9.Z, T7.Y, literal.y, 6229; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6230; CM-NEXT: AND_INT T9.X, T7.Y, literal.x, 6231; CM-NEXT: MOV T9.Y, 0.0, 6232; CM-NEXT: LSHR * T10.Z, T7.Z, literal.y, 6233; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6234; CM-NEXT: AND_INT T10.X, T7.Z, literal.x, 6235; CM-NEXT: MOV T10.Y, 0.0, 6236; CM-NEXT: LSHR * T7.Z, T7.W, literal.y, 6237; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6238; CM-NEXT: AND_INT T7.X, T7.W, literal.x, 6239; CM-NEXT: MOV T7.Y, 0.0, 6240; CM-NEXT: MOV * T8.W, 0.0, 6241; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6242; CM-NEXT: MOV * T9.W, 0.0, 6243; CM-NEXT: MOV * T10.W, 0.0, 6244; CM-NEXT: MOV * T7.W, 0.0, 6245; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6246; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6247; CM-NEXT: LSHR T11.X, PV.W, literal.x, 6248; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6249; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6250; CM-NEXT: LSHR T12.X, PV.W, literal.x, 6251; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6252; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6253; CM-NEXT: LSHR * T13.X, PV.W, literal.x, 6254; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6255; CM-NEXT: LSHR * T14.X, KC0[2].Y, literal.x, 6256; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6257 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 6258 %ext = zext <8 x i16> %load to <8 x i64> 6259 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 6260 ret void 6261} 6262 6263define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 6264; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64: 6265; GCN-NOHSA-SI: ; %bb.0: 6266; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6267; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6268; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6269; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6270; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6271; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6272; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6273; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6274; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6275; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6276; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6277; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6278; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3 6279; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 6280; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6281; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 6282; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[0:1], 48 6283; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v5, 0, 16 6284; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[14:15], v[2:3], 48 6285; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v1, 0, 16 6286; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16 6287; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6288; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6289; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v6, 0, 16 6290; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16 6291; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6292; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6293; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6294; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6295; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 6296; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6297; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6298; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6299; GCN-NOHSA-SI-NEXT: s_endpgm 6300; 6301; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64: 6302; GCN-HSA: ; %bb.0: 6303; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6304; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6305; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6306; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6307; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6308; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6309; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6310; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6311; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6312; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6313; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6314; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 6315; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 6316; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 6317; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6318; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 6319; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6320; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6321; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6322; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6323; GCN-HSA-NEXT: v_bfe_i32 v4, v1, 0, 16 6324; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[0:1], 48 6325; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6326; GCN-HSA-NEXT: v_mov_b32_e32 v11, v3 6327; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6328; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 6329; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 6330; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v0 6331; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 6332; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6333; GCN-HSA-NEXT: v_bfe_i32 v0, v11, 0, 16 6334; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 6335; GCN-HSA-NEXT: v_bfe_i32 v10, v10, 0, 16 6336; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6337; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6338; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6339; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6340; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6341; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[0:3] 6342; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6343; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 6344; GCN-HSA-NEXT: s_endpgm 6345; 6346; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64: 6347; GCN-NOHSA-VI: ; %bb.0: 6348; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6349; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6350; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6351; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6352; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6353; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6354; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6355; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6356; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6357; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6358; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6359; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6360; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v3 6361; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 6362; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6363; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 6364; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v11, 0, 16 6365; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 6366; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 6367; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 6368; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 6369; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 6370; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 6371; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 6372; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v10, 0, 16 6373; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6374; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 6375; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6376; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6377; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6378; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6379; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6380; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6381; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 6382; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 6383; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6384; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6385; GCN-NOHSA-VI-NEXT: s_endpgm 6386; 6387; EG-LABEL: global_sextload_v8i16_to_v8i64: 6388; EG: ; %bb.0: 6389; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6390; EG-NEXT: TEX 0 @8 6391; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6392; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 6393; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 6394; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 6395; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 6396; EG-NEXT: CF_END 6397; EG-NEXT: Fetch clause starting at 8: 6398; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6399; EG-NEXT: ALU clause starting at 10: 6400; EG-NEXT: MOV * T7.X, KC0[2].Z, 6401; EG-NEXT: ALU clause starting at 11: 6402; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6403; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6404; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6405; EG-NEXT: LSHR T9.X, PV.W, literal.x, 6406; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6407; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 6408; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6409; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6410; EG-NEXT: LSHR T11.X, PV.W, literal.x, 6411; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 6412; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 6413; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6414; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6415; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 6416; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 6417; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 6418; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6419; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 6420; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 6421; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 6422; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 6423; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6424; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 6425; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 6426; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 6427; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6428; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 6429; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 6430; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6431; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6432; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6433; EG-NEXT: LSHR T7.X, PV.W, literal.x, 6434; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 6435; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6436; 6437; CM-LABEL: global_sextload_v8i16_to_v8i64: 6438; CM: ; %bb.0: 6439; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6440; CM-NEXT: TEX 0 @8 6441; CM-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6442; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T14.X 6443; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T11.X 6444; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T9.X 6445; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T8.X 6446; CM-NEXT: CF_END 6447; CM-NEXT: Fetch clause starting at 8: 6448; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6449; CM-NEXT: ALU clause starting at 10: 6450; CM-NEXT: MOV * T7.X, KC0[2].Z, 6451; CM-NEXT: ALU clause starting at 11: 6452; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6453; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6454; CM-NEXT: LSHR T8.X, PV.W, literal.x, 6455; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6456; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6457; CM-NEXT: LSHR T9.X, PV.W, literal.x, 6458; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 6459; CM-NEXT: ASHR * T10.W, T7.W, literal.z, 6460; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6461; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6462; CM-NEXT: LSHR T11.X, PV.Z, literal.x, 6463; CM-NEXT: ASHR T10.Z, T7.W, literal.y, 6464; CM-NEXT: ASHR * T12.W, T7.Z, literal.z, 6465; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6466; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6467; CM-NEXT: BFE_INT T10.X, T7.W, 0.0, literal.x, 6468; CM-NEXT: ASHR T12.Z, T7.Z, literal.x, 6469; CM-NEXT: ASHR * T13.W, T7.Y, literal.y, 6470; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6471; CM-NEXT: BFE_INT T12.X, T7.Z, 0.0, literal.x, 6472; CM-NEXT: ASHR T10.Y, PV.X, literal.y, 6473; CM-NEXT: ASHR T13.Z, T7.Y, literal.x, 6474; CM-NEXT: ASHR * T7.W, T7.X, literal.y, 6475; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6476; CM-NEXT: BFE_INT T13.X, T7.Y, 0.0, literal.x, 6477; CM-NEXT: ASHR T12.Y, PV.X, literal.y, 6478; CM-NEXT: ASHR * T7.Z, T7.X, literal.x, 6479; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6480; CM-NEXT: BFE_INT T7.X, T7.X, 0.0, literal.x, 6481; CM-NEXT: ASHR * T13.Y, PV.X, literal.y, 6482; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6483; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x, 6484; CM-NEXT: ASHR * T7.Y, PV.X, literal.y, 6485; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6486 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 6487 %ext = sext <8 x i16> %load to <8 x i64> 6488 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 6489 ret void 6490} 6491 6492define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 6493; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64: 6494; GCN-NOHSA-SI: ; %bb.0: 6495; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6496; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6497; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6498; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6499; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6500; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6501; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6502; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6503; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6504; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, 0xffff 6505; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6506; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 6507; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 6508; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6509; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v0 6510; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, s0, v0 6511; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, s0, v2 6512; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s0, v1 6513; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v3 6514; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s0, v3 6515; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, 0 6516; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6517; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v5 6518; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 6519; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v23, s0, v6 6520; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 6521; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s0, v4 6522; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v7 6523; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, s0, v7 6524; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v19, s0, v5 6525; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v20 6526; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v20 6527; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, v20 6528; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v20 6529; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v20 6530; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v20 6531; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v20 6532; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v20 6533; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, v20 6534; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v20 6535; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v20 6536; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6537; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6538; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:80 6539; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:112 6540; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) 6541; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, 0 6542; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, 0 6543; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6544; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, 0 6545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 6546; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6547; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6548; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 6549; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:96 6550; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6551; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 6552; GCN-NOHSA-SI-NEXT: s_endpgm 6553; 6554; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64: 6555; GCN-HSA: ; %bb.0: 6556; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6557; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 6558; GCN-HSA-NEXT: s_mov_b32 s6, 0xffff 6559; GCN-HSA-NEXT: v_mov_b32_e32 v10, v8 6560; GCN-HSA-NEXT: v_mov_b32_e32 v12, v8 6561; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6562; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 6563; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6564; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 6565; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 6566; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6567; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 6568; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6569; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 6570; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6571; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6572; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 6573; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6574; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6575; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6576; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50 6577; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6578; GCN-HSA-NEXT: v_mov_b32_e32 v16, s3 6579; GCN-HSA-NEXT: v_mov_b32_e32 v15, s2 6580; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6581; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5 6582; GCN-HSA-NEXT: v_and_b32_e32 v9, s6, v5 6583; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6584; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6585; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6586; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 6587; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6588; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6589; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v1 6590; GCN-HSA-NEXT: v_and_b32_e32 v9, s6, v1 6591; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6592; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6593; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6594; GCN-HSA-NEXT: v_mov_b32_e32 v18, s3 6595; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6596; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3 6597; GCN-HSA-NEXT: v_and_b32_e32 v9, s6, v3 6598; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6599; GCN-HSA-NEXT: v_mov_b32_e32 v17, s2 6600; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 6601; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v7 6602; GCN-HSA-NEXT: v_and_b32_e32 v7, s6, v7 6603; GCN-HSA-NEXT: v_mov_b32_e32 v14, 0 6604; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v6 6605; GCN-HSA-NEXT: v_and_b32_e32 v11, s6, v6 6606; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[7:10] 6607; GCN-HSA-NEXT: flat_store_dwordx4 v[17:18], v[11:14] 6608; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6609; GCN-HSA-NEXT: v_mov_b32_e32 v16, s1 6610; GCN-HSA-NEXT: v_mov_b32_e32 v15, s0 6611; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x60 6612; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6613; GCN-HSA-NEXT: v_mov_b32_e32 v18, s3 6614; GCN-HSA-NEXT: v_mov_b32_e32 v1, v8 6615; GCN-HSA-NEXT: v_mov_b32_e32 v7, v8 6616; GCN-HSA-NEXT: v_mov_b32_e32 v3, v14 6617; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v0 6618; GCN-HSA-NEXT: v_and_b32_e32 v6, s6, v0 6619; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v2 6620; GCN-HSA-NEXT: v_and_b32_e32 v11, s6, v2 6621; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v4 6622; GCN-HSA-NEXT: v_and_b32_e32 v0, s6, v4 6623; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6624; GCN-HSA-NEXT: v_mov_b32_e32 v9, v14 6625; GCN-HSA-NEXT: v_mov_b32_e32 v17, s2 6626; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6627; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[0:3] 6628; GCN-HSA-NEXT: flat_store_dwordx4 v[17:18], v[6:9] 6629; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[11:14] 6630; GCN-HSA-NEXT: s_endpgm 6631; 6632; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64: 6633; GCN-NOHSA-VI: ; %bb.0: 6634; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6635; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6636; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6637; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6638; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6639; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6640; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6641; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6642; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6643; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6644; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, 0xffff 6645; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v28, 0 6646; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v30, 0 6647; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6648; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v28 6649; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v28 6650; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v28 6651; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v28 6652; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v28 6653; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, 0 6654; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 6655; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 6656; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, 0 6657; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 6658; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s0, v0 6659; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v0 6660; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, s0, v2 6661; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s0, v1 6662; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s0, v3 6663; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v3 6664; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6665; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, s0, v4 6666; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, s0, v5 6667; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, s0, v6 6668; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v27, s0, v7 6669; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v7 6670; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6671; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v4 6672; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v5 6673; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v6 6674; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112 6675; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v28 6676; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, 0 6677; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, 0 6678; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v1 6679; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96 6680; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 6681; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v28 6682; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, 0 6683; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80 6684; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 6685; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 6686; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6687; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 6688; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 6689; GCN-NOHSA-VI-NEXT: s_endpgm 6690; 6691; EG-LABEL: global_zextload_v16i16_to_v16i64: 6692; EG: ; %bb.0: 6693; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6694; EG-NEXT: TEX 1 @12 6695; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 6696; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 6697; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 6698; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 6699; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 6700; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 6701; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 6702; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 6703; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 6704; EG-NEXT: CF_END 6705; EG-NEXT: Fetch clause starting at 12: 6706; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6707; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6708; EG-NEXT: ALU clause starting at 16: 6709; EG-NEXT: MOV * T11.X, KC0[2].Z, 6710; EG-NEXT: ALU clause starting at 17: 6711; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 6712; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6713; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 6714; EG-NEXT: MOV T13.Y, 0.0, 6715; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 6716; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 6717; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6718; EG-NEXT: MOV T14.Y, 0.0, 6719; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 6720; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6721; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 6722; EG-NEXT: MOV T15.Y, 0.0, 6723; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 6724; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 6725; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6726; EG-NEXT: MOV T12.Y, 0.0, 6727; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 6728; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6729; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 6730; EG-NEXT: MOV T16.Y, 0.0, 6731; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 6732; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 6733; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6734; EG-NEXT: MOV T17.Y, 0.0, 6735; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 6736; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6737; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 6738; EG-NEXT: MOV T18.Y, 0.0, 6739; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 6740; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 6741; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6742; EG-NEXT: MOV T11.Y, 0.0, 6743; EG-NEXT: MOV T13.W, 0.0, 6744; EG-NEXT: MOV * T14.W, 0.0, 6745; EG-NEXT: MOV T15.W, 0.0, 6746; EG-NEXT: MOV * T12.W, 0.0, 6747; EG-NEXT: MOV T16.W, 0.0, 6748; EG-NEXT: MOV * T17.W, 0.0, 6749; EG-NEXT: MOV T18.W, 0.0, 6750; EG-NEXT: MOV * T11.W, 0.0, 6751; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 6752; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6753; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6754; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6755; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6756; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6757; EG-NEXT: LSHR T21.X, PV.W, literal.x, 6758; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6759; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6760; EG-NEXT: LSHR T22.X, PV.W, literal.x, 6761; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6762; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6763; EG-NEXT: LSHR T23.X, PV.W, literal.x, 6764; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6765; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6766; EG-NEXT: LSHR T24.X, PV.W, literal.x, 6767; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6768; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6769; EG-NEXT: LSHR T25.X, PV.W, literal.x, 6770; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6771; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6772; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 6773; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6774; 6775; CM-LABEL: global_zextload_v16i16_to_v16i64: 6776; CM: ; %bb.0: 6777; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6778; CM-NEXT: TEX 1 @12 6779; CM-NEXT: ALU 64, @17, KC0[CB0:0-32], KC1[] 6780; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T26.X 6781; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T25.X 6782; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T24.X 6783; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T23.X 6784; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T22.X 6785; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T21.X 6786; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T18, T20.X 6787; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T19.X 6788; CM-NEXT: CF_END 6789; CM-NEXT: Fetch clause starting at 12: 6790; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 6791; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 6792; CM-NEXT: ALU clause starting at 16: 6793; CM-NEXT: MOV * T11.X, KC0[2].Z, 6794; CM-NEXT: ALU clause starting at 17: 6795; CM-NEXT: LSHR * T13.Z, T12.X, literal.x, 6796; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6797; CM-NEXT: AND_INT T13.X, T12.X, literal.x, 6798; CM-NEXT: MOV T13.Y, 0.0, 6799; CM-NEXT: LSHR * T14.Z, T12.Y, literal.y, 6800; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6801; CM-NEXT: AND_INT T14.X, T12.Y, literal.x, 6802; CM-NEXT: MOV T14.Y, 0.0, 6803; CM-NEXT: LSHR * T15.Z, T12.Z, literal.y, 6804; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6805; CM-NEXT: AND_INT T15.X, T12.Z, literal.x, 6806; CM-NEXT: MOV T15.Y, 0.0, 6807; CM-NEXT: LSHR * T12.Z, T12.W, literal.y, 6808; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6809; CM-NEXT: AND_INT T12.X, T12.W, literal.x, 6810; CM-NEXT: MOV T12.Y, 0.0, 6811; CM-NEXT: LSHR * T16.Z, T11.X, literal.y, 6812; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6813; CM-NEXT: AND_INT T16.X, T11.X, literal.x, 6814; CM-NEXT: MOV T16.Y, 0.0, 6815; CM-NEXT: LSHR * T17.Z, T11.Y, literal.y, 6816; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6817; CM-NEXT: AND_INT T17.X, T11.Y, literal.x, 6818; CM-NEXT: MOV T17.Y, 0.0, 6819; CM-NEXT: LSHR * T18.Z, T11.Z, literal.y, 6820; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6821; CM-NEXT: AND_INT T18.X, T11.Z, literal.x, 6822; CM-NEXT: MOV T18.Y, 0.0, 6823; CM-NEXT: LSHR * T11.Z, T11.W, literal.y, 6824; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6825; CM-NEXT: AND_INT T11.X, T11.W, literal.x, 6826; CM-NEXT: MOV T11.Y, 0.0, 6827; CM-NEXT: MOV * T13.W, 0.0, 6828; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6829; CM-NEXT: MOV * T14.W, 0.0, 6830; CM-NEXT: MOV * T15.W, 0.0, 6831; CM-NEXT: MOV * T12.W, 0.0, 6832; CM-NEXT: MOV * T16.W, 0.0, 6833; CM-NEXT: MOV * T17.W, 0.0, 6834; CM-NEXT: MOV * T18.W, 0.0, 6835; CM-NEXT: MOV * T11.W, 0.0, 6836; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6837; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6838; CM-NEXT: LSHR T19.X, PV.W, literal.x, 6839; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6840; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6841; CM-NEXT: LSHR T20.X, PV.W, literal.x, 6842; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6843; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6844; CM-NEXT: LSHR T21.X, PV.W, literal.x, 6845; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6846; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6847; CM-NEXT: LSHR T22.X, PV.W, literal.x, 6848; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6849; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6850; CM-NEXT: LSHR T23.X, PV.W, literal.x, 6851; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6852; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6853; CM-NEXT: LSHR T24.X, PV.W, literal.x, 6854; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6855; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6856; CM-NEXT: LSHR * T25.X, PV.W, literal.x, 6857; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6858; CM-NEXT: LSHR * T26.X, KC0[2].Y, literal.x, 6859; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6860 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 6861 %ext = zext <16 x i16> %load to <16 x i64> 6862 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 6863 ret void 6864} 6865 6866define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 6867; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64: 6868; GCN-NOHSA-SI: ; %bb.0: 6869; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6870; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6871; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6872; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6873; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6874; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6875; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6876; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6877; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6878; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6879; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6880; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6881; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6882; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, v7 6883; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, v3 6884; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v4 6885; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6886; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v0 6887; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v8, 0, 16 6888; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[6:7], 48 6889; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6890; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 6891; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6892; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[9:10], v[4:5], 48 6893; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v5, 0, 16 6894; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 6895; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80 6896; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v0, 0, 16 6897; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6898; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v15, 0, 16 6899; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v12, 0, 16 6900; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[2:3], 48 6901; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6902; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 6903; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6904; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[11:12], v[0:1], 48 6905; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v1, 0, 16 6906; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16 6907; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v14, 0, 16 6908; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v13, 0, 16 6909; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v4, 0, 16 6910; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 6911; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v6, 0, 16 6912; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v1, 0, 16 6913; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 6914; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6915; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 6916; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 6917; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 6918; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 6919; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6920; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 6921; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 6922; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:16 6923; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:96 6924; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:64 6925; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6926; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 6927; GCN-NOHSA-SI-NEXT: s_endpgm 6928; 6929; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64: 6930; GCN-HSA: ; %bb.0: 6931; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6932; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6933; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6934; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6935; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 6936; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 6937; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6938; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6939; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6940; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 6941; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6942; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6943; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6944; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6945; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6946; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6947; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 6948; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 6949; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 6950; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6951; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6952; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6953; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 6954; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6955; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 6956; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 6957; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6958; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 6959; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[0:1], 48 6960; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6961; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3 6962; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6963; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 6964; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 6965; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[2:3], 48 6966; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 6967; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6968; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6969; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 6970; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6971; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v2 6972; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 6973; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 6974; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6975; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 6976; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 6977; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 6978; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6979; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 6980; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6981; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6982; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6983; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 6984; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6985; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6986; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 6987; GCN-HSA-NEXT: v_bfe_i32 v0, v5, 0, 16 6988; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[4:5], 48 6989; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 6990; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 6991; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6 6992; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6993; GCN-HSA-NEXT: v_mov_b32_e32 v11, v7 6994; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6995; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 6996; GCN-HSA-NEXT: v_bfe_i32 v10, v8, 0, 16 6997; GCN-HSA-NEXT: v_bfe_i32 v8, v6, 0, 16 6998; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 6999; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 7000; GCN-HSA-NEXT: v_bfe_i32 v4, v11, 0, 16 7001; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[6:7], 48 7002; GCN-HSA-NEXT: v_bfe_i32 v2, v1, 0, 16 7003; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 7004; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 7005; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 7006; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 7007; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 7008; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7009; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7010; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7011; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7012; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[4:7] 7013; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 7014; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 7015; GCN-HSA-NEXT: s_endpgm 7016; 7017; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64: 7018; GCN-NOHSA-VI: ; %bb.0: 7019; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 7020; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7021; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7022; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7023; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7024; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7025; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7026; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7027; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 7028; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 7029; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7030; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7031; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7032; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 7033; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v7 7035; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 7036; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v9, 0, 16 7037; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v7, 0, 16 7038; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7039; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7040; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:112 7041; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 7042; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v6, 0, 16 7043; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v6 7044; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v6, 0, 16 7045; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7046; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7047; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96 7048; GCN-NOHSA-VI-NEXT: s_nop 0 7049; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v5, 0, 16 7050; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 7051; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v5, 0, 16 7052; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7053; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7054; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:80 7055; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 7056; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v1, 0, 16 7057; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7058; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v1, 0, 16 7059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v3 7060; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v1, 0, 16 7061; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 7062; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 7063; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 7064; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16 7065; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v0, 0, 16 7066; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v2, 0, 16 7067; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 7068; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16 7069; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 7070; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 7071; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7072; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 7073; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7074; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7075; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 7076; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7077; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 7078; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 7079; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 7080; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 7081; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7082; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 7083; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 7084; GCN-NOHSA-VI-NEXT: s_endpgm 7085; 7086; EG-LABEL: global_sextload_v16i16_to_v16i64: 7087; EG: ; %bb.0: 7088; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7089; EG-NEXT: TEX 1 @12 7090; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7091; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 7092; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 7093; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 7094; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 7095; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 7096; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 7097; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 7098; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 7099; EG-NEXT: CF_END 7100; EG-NEXT: Fetch clause starting at 12: 7101; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 7102; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 7103; EG-NEXT: ALU clause starting at 16: 7104; EG-NEXT: MOV * T11.X, KC0[2].Z, 7105; EG-NEXT: ALU clause starting at 17: 7106; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 7107; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7108; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7109; EG-NEXT: LSHR T14.X, PV.W, literal.x, 7110; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7111; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7112; EG-NEXT: LSHR T15.X, PV.W, literal.x, 7113; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7114; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7115; EG-NEXT: LSHR T16.X, PV.W, literal.x, 7116; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7117; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7118; EG-NEXT: LSHR T17.X, PV.W, literal.x, 7119; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7120; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7121; EG-NEXT: LSHR T18.X, PV.W, literal.x, 7122; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7123; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 7124; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7125; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7126; EG-NEXT: LSHR T20.X, PV.W, literal.x, 7127; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 7128; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 7129; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7130; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7131; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 7132; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 7133; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 7134; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7135; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 7136; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7137; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 7138; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 7139; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7140; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 7141; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 7142; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 7143; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 7144; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7145; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 7146; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 7147; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 7148; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 7149; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7150; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 7151; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 7152; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 7153; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 7154; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7155; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 7156; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 7157; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 7158; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 7159; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7160; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 7161; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 7162; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 7163; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7164; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 7165; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 7166; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7167; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7168; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7169; EG-NEXT: LSHR T12.X, PV.W, literal.x, 7170; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 7171; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7172; 7173; CM-LABEL: global_sextload_v16i16_to_v16i64: 7174; CM: ; %bb.0: 7175; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7176; CM-NEXT: TEX 1 @12 7177; CM-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7178; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T26.X 7179; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T20.X 7180; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T18.X 7181; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T17.X 7182; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T16.X 7183; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T15.X 7184; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T14.X 7185; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T13.X 7186; CM-NEXT: CF_END 7187; CM-NEXT: Fetch clause starting at 12: 7188; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 7189; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 7190; CM-NEXT: ALU clause starting at 16: 7191; CM-NEXT: MOV * T11.X, KC0[2].Z, 7192; CM-NEXT: ALU clause starting at 17: 7193; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7194; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7195; CM-NEXT: LSHR T13.X, PV.W, literal.x, 7196; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7197; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7198; CM-NEXT: LSHR T14.X, PV.W, literal.x, 7199; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7200; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7201; CM-NEXT: LSHR T15.X, PV.W, literal.x, 7202; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7203; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7204; CM-NEXT: LSHR T16.X, PV.W, literal.x, 7205; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7206; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7207; CM-NEXT: LSHR T17.X, PV.W, literal.x, 7208; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7209; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7210; CM-NEXT: LSHR T18.X, PV.W, literal.x, 7211; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 7212; CM-NEXT: ASHR * T19.W, T11.W, literal.z, 7213; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7214; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7215; CM-NEXT: LSHR T20.X, PV.Z, literal.x, 7216; CM-NEXT: ASHR T19.Z, T11.W, literal.y, 7217; CM-NEXT: ASHR * T21.W, T11.Z, literal.z, 7218; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7219; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7220; CM-NEXT: BFE_INT T19.X, T11.W, 0.0, literal.x, 7221; CM-NEXT: ASHR T21.Z, T11.Z, literal.x, 7222; CM-NEXT: ASHR * T22.W, T11.Y, literal.y, 7223; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7224; CM-NEXT: BFE_INT T21.X, T11.Z, 0.0, literal.x, 7225; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 7226; CM-NEXT: ASHR T22.Z, T11.Y, literal.x, 7227; CM-NEXT: ASHR * T11.W, T11.X, literal.y, 7228; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7229; CM-NEXT: BFE_INT T22.X, T11.Y, 0.0, literal.x, 7230; CM-NEXT: ASHR T21.Y, PV.X, literal.y, 7231; CM-NEXT: ASHR T11.Z, T11.X, literal.x, 7232; CM-NEXT: ASHR * T23.W, T12.W, literal.y, 7233; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7234; CM-NEXT: BFE_INT T11.X, T11.X, 0.0, literal.x, 7235; CM-NEXT: ASHR T22.Y, PV.X, literal.y, 7236; CM-NEXT: ASHR T23.Z, T12.W, literal.x, 7237; CM-NEXT: ASHR * T24.W, T12.Z, literal.y, 7238; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7239; CM-NEXT: BFE_INT T23.X, T12.W, 0.0, literal.x, 7240; CM-NEXT: ASHR T11.Y, PV.X, literal.y, 7241; CM-NEXT: ASHR T24.Z, T12.Z, literal.x, 7242; CM-NEXT: ASHR * T25.W, T12.Y, literal.y, 7243; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7244; CM-NEXT: BFE_INT T24.X, T12.Z, 0.0, literal.x, 7245; CM-NEXT: ASHR T23.Y, PV.X, literal.y, 7246; CM-NEXT: ASHR T25.Z, T12.Y, literal.x, 7247; CM-NEXT: ASHR * T12.W, T12.X, literal.y, 7248; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7249; CM-NEXT: BFE_INT T25.X, T12.Y, 0.0, literal.x, 7250; CM-NEXT: ASHR T24.Y, PV.X, literal.y, 7251; CM-NEXT: ASHR * T12.Z, T12.X, literal.x, 7252; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7253; CM-NEXT: BFE_INT T12.X, T12.X, 0.0, literal.x, 7254; CM-NEXT: ASHR * T25.Y, PV.X, literal.y, 7255; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7256; CM-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 7257; CM-NEXT: ASHR * T12.Y, PV.X, literal.y, 7258; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7259 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 7260 %ext = sext <16 x i16> %load to <16 x i64> 7261 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 7262 ret void 7263} 7264 7265define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 7266; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64: 7267; GCN-NOHSA-SI: ; %bb.0: 7268; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 7269; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 7270; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 7271; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 7272; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s3 7273; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 7274; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 7275; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 7276; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 7277; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, 0xffff 7278; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 7279; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 7280; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 7281; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7282; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 7283; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 7284; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[2:5], off, s[8:11], 0 7285; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[6:9], off, s[8:11], 0 offset:16 7286; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[10:13], off, s[8:11], 0 offset:32 7287; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[14:17], off, s[8:11], 0 offset:48 7288; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 7289; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v3 7290; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v4 7291; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v2 7292; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, s0, v2 7293; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 7294; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7295; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 7296; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 7297; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 7298; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) 7299; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v19, s0, v4 7300; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7301; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill 7302; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7303; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill 7304; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill 7305; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill 7306; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) 7307; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v21, s0, v3 7308; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v5 7309; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, s0, v5 7310; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v6 7311; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, s0, v6 7312; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v8 7313; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, s0, v8 7314; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v40, 16, v7 7315; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, s0, v7 7316; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v9 7317; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, s0, v9 7318; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v10 7319; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, s0, v10 7320; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v24, 16, v12 7321; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7322; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, s0, v12 7323; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v11 7324; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, s0, v11 7325; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v13 7326; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, s0, v13 7327; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v17 7328; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v14 7329; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, s0, v14 7330; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v16 7331; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, s0, v16 7332; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15 7333; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, s0, v15 7334; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, s0, v17 7335; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 7336; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v1 7337; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v1 7338; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v55, v1 7339; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v57, v1 7340; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v51, v1 7341; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v53, v1 7342; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v43, v1 7343; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v45, v1 7344; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v39, v1 7345; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v1 7346; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v1 7347; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v1 7348; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, v21 7349; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23 7350; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 7351; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 7352; GCN-NOHSA-SI-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill 7353; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7354; GCN-NOHSA-SI-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill 7355; GCN-NOHSA-SI-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill 7356; GCN-NOHSA-SI-NEXT: buffer_store_dword v7, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill 7357; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v1 7358; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v59, v1 7359; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v1 7360; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v47, v1 7361; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v1 7362; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v35, v1 7363; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7364; GCN-NOHSA-SI-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload 7365; GCN-NOHSA-SI-NEXT: buffer_load_dword v5, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload 7366; GCN-NOHSA-SI-NEXT: buffer_load_dword v6, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload 7367; GCN-NOHSA-SI-NEXT: buffer_load_dword v7, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload 7368; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7369; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 7370; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 7371; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 7372; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 7373; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 7374; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7375; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v1 7376; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 7377; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 7378; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 7379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, 0 7380; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v61, 0 7381; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, 0 7382; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7383; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, v12 7384; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v13 7385; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, v14 7386; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, 0 7387; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 7388; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v37, 0 7389; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, 0 7390; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v49, 0 7391; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 7392; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176 7393; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144 7394; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112 7395; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:80 7396; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:48 7397; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 7398; GCN-NOHSA-SI-NEXT: buffer_load_dword v8, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload 7399; GCN-NOHSA-SI-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload 7400; GCN-NOHSA-SI-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload 7401; GCN-NOHSA-SI-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload 7402; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7403; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 7404; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224 7405; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192 7406; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:160 7407; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128 7408; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:96 7409; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:64 7410; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 7411; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7412; GCN-NOHSA-SI-NEXT: s_endpgm 7413; 7414; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64: 7415; GCN-HSA: ; %bb.0: 7416; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 7417; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 7418; GCN-HSA-NEXT: s_mov_b32 s16, 0xffff 7419; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 7420; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 7421; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7422; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 7423; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7424; GCN-HSA-NEXT: v_mov_b32_e32 v7, s5 7425; GCN-HSA-NEXT: v_mov_b32_e32 v6, s4 7426; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 7427; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7428; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 7429; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 7430; GCN-HSA-NEXT: flat_load_dwordx4 v[6:9], v[6:7] 7431; GCN-HSA-NEXT: flat_load_dwordx4 v[10:13], v[10:11] 7432; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 7433; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 7434; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 7435; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 7436; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 7437; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 7438; GCN-HSA-NEXT: flat_load_dwordx4 v[14:17], v[14:15] 7439; GCN-HSA-NEXT: flat_load_dwordx4 v[18:21], v[18:19] 7440; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 7441; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7442; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 7443; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7444; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xf0 7445; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7446; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xd0 7447; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 7448; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xb0 7449; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7450; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x90 7451; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 7452; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x70 7453; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 7454; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 7455; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 7456; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x50 7457; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 7458; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 7459; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v9 7460; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v9 7461; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7462; GCN-HSA-NEXT: v_mov_b32_e32 v23, s15 7463; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v7 7464; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v7 7465; GCN-HSA-NEXT: v_mov_b32_e32 v22, s14 7466; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7467; GCN-HSA-NEXT: v_mov_b32_e32 v23, s11 7468; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 7469; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v13 7470; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v13 7471; GCN-HSA-NEXT: v_mov_b32_e32 v22, s10 7472; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7473; GCN-HSA-NEXT: v_mov_b32_e32 v23, s13 7474; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v11 7475; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v11 7476; GCN-HSA-NEXT: v_mov_b32_e32 v22, s12 7477; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7478; GCN-HSA-NEXT: v_mov_b32_e32 v23, s5 7479; GCN-HSA-NEXT: v_mov_b32_e32 v22, s4 7480; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 7481; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v19 7482; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v19 7483; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7484; GCN-HSA-NEXT: v_mov_b32_e32 v23, s7 7485; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v17 7486; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v17 7487; GCN-HSA-NEXT: v_mov_b32_e32 v22, s6 7488; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7489; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 7490; GCN-HSA-NEXT: v_mov_b32_e32 v23, s9 7491; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7492; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v15 7493; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v15 7494; GCN-HSA-NEXT: v_mov_b32_e32 v22, s8 7495; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[2:5] 7496; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xe0 7497; GCN-HSA-NEXT: v_mov_b32_e32 v7, 0 7498; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v18 7499; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v18 7500; GCN-HSA-NEXT: v_mov_b32_e32 v18, s1 7501; GCN-HSA-NEXT: v_mov_b32_e32 v5, v7 7502; GCN-HSA-NEXT: v_mov_b32_e32 v17, s0 7503; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7504; GCN-HSA-NEXT: flat_store_dwordx4 v[17:18], v[2:5] 7505; GCN-HSA-NEXT: v_and_b32_e32 v0, s16, v21 7506; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v16 7507; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v16 7508; GCN-HSA-NEXT: v_mov_b32_e32 v16, s7 7509; GCN-HSA-NEXT: v_mov_b32_e32 v15, s6 7510; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xc0 7511; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7512; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[2:5] 7513; GCN-HSA-NEXT: v_mov_b32_e32 v16, s3 7514; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v14 7515; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v14 7516; GCN-HSA-NEXT: v_mov_b32_e32 v14, s7 7517; GCN-HSA-NEXT: v_mov_b32_e32 v13, s6 7518; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xa0 7519; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7520; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[2:5] 7521; GCN-HSA-NEXT: v_mov_b32_e32 v15, s2 7522; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v12 7523; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v12 7524; GCN-HSA-NEXT: v_mov_b32_e32 v12, s7 7525; GCN-HSA-NEXT: v_mov_b32_e32 v11, s6 7526; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x80 7527; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7528; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[2:5] 7529; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 7530; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v10 7531; GCN-HSA-NEXT: v_and_b32_e32 v2, s16, v10 7532; GCN-HSA-NEXT: v_mov_b32_e32 v10, s7 7533; GCN-HSA-NEXT: v_mov_b32_e32 v9, s6 7534; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[2:5] 7535; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7536; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v21 7537; GCN-HSA-NEXT: flat_store_dwordx4 v[15:16], v[0:3] 7538; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v6 7539; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 7540; GCN-HSA-NEXT: v_and_b32_e32 v9, s16, v6 7541; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v20 7542; GCN-HSA-NEXT: v_and_b32_e32 v4, s16, v20 7543; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 7544; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 7545; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 7546; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[4:7] 7547; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 7548; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v8 7549; GCN-HSA-NEXT: v_and_b32_e32 v12, s16, v8 7550; GCN-HSA-NEXT: v_mov_b32_e32 v13, v1 7551; GCN-HSA-NEXT: v_mov_b32_e32 v15, v7 7552; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 7553; GCN-HSA-NEXT: v_mov_b32_e32 v10, v1 7554; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 7555; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 7556; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[12:15] 7557; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 7558; GCN-HSA-NEXT: v_mov_b32_e32 v12, v7 7559; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[9:12] 7560; GCN-HSA-NEXT: s_endpgm 7561; 7562; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64: 7563; GCN-NOHSA-VI: ; %bb.0: 7564; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 7565; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7566; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7567; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7568; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7569; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7570; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7571; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7572; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 7573; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 7574; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[31:34], off, s[8:11], 0 offset:32 7575; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:48 7576; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, 0xffff 7577; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v57, 0 7578; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7579; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, 0 7580; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 7581; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, 0 7582; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, 0 7583; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 7584; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 7585; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, s0, v1 7586; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 7587; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7588; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v48, s0, v36 7589; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v50, 16, v36 7590; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v54, s0, v38 7591; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v56, 16, v38 7592; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, s0, v37 7593; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v38, 16, v37 7594; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v37, 0 7595; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v1, s0, v3 7596; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, s0, v0 7597; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, s0, v2 7598; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, s0, v5 7599; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, s0, v4 7600; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v4 7601; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, s0, v6 7602; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, s0, v7 7603; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v39, s0, v32 7604; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v41, 16, v32 7605; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, s0, v34 7606; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v42, s0, v31 7607; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v45, s0, v33 7608; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v51, s0, v35 7609; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v55, v37 7610; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7611; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v53, 16, v35 7612; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:240 7613; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v52, v37 7614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v54, 0 7615; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:192 7616; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v49, v37 7617; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v51, 0 7618; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v47, 16, v33 7619; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:208 7620; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v46, v37 7621; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v48, 0 7622; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v5 7623; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v30, 16, v7 7624; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v44, 16, v31 7625; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:160 7626; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v6 7627; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v45, 0 7628; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v43, v37 7629; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, 0 7630; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v37 7631; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v2 7632; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:128 7633; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 7634; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 7635; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 7636; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v34 7637; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v35, 0 7638; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v33, v37 7639; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v42, 0 7640; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v40, v37 7641; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, 0 7642; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, 0 7643; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v29, v37 7644; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, v37 7645; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v37 7646; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v37 7647; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:144 7648; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:176 7649; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v39, 0 7650; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v37 7651; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v37 7652; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v37 7653; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:112 7654; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:64 7655; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 7656; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:224 7657; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 7658; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:48 7659; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 7660; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 7661; GCN-NOHSA-VI-NEXT: s_endpgm 7662; 7663; EG-LABEL: global_zextload_v32i16_to_v32i64: 7664; EG: ; %bb.0: 7665; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7666; EG-NEXT: TEX 2 @22 7667; EG-NEXT: ALU 33, @31, KC0[], KC1[] 7668; EG-NEXT: TEX 0 @28 7669; EG-NEXT: ALU 93, @65, KC0[CB0:0-32], KC1[] 7670; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 7671; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 7672; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 7673; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 7674; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 7675; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 7676; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 7677; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 7678; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 7679; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 7680; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 7681; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 7682; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 7683; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 7684; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 7685; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 7686; EG-NEXT: CF_END 7687; EG-NEXT: Fetch clause starting at 22: 7688; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 7689; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 7690; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 7691; EG-NEXT: Fetch clause starting at 28: 7692; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 7693; EG-NEXT: ALU clause starting at 30: 7694; EG-NEXT: MOV * T19.X, KC0[2].Z, 7695; EG-NEXT: ALU clause starting at 31: 7696; EG-NEXT: LSHR * T23.Z, T20.Z, literal.x, 7697; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7698; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 7699; EG-NEXT: MOV T23.Y, 0.0, 7700; EG-NEXT: LSHR T24.Z, T20.W, literal.y, 7701; EG-NEXT: AND_INT * T24.X, T20.W, literal.x, 7702; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7703; EG-NEXT: MOV T24.Y, 0.0, 7704; EG-NEXT: LSHR * T25.Z, T20.X, literal.x, 7705; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7706; EG-NEXT: AND_INT T25.X, T20.X, literal.x, 7707; EG-NEXT: MOV T25.Y, 0.0, 7708; EG-NEXT: LSHR T20.Z, T20.Y, literal.y, 7709; EG-NEXT: AND_INT * T20.X, T20.Y, literal.x, 7710; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7711; EG-NEXT: MOV T20.Y, 0.0, 7712; EG-NEXT: LSHR * T26.Z, T22.Z, literal.x, 7713; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7714; EG-NEXT: AND_INT T26.X, T22.Z, literal.x, 7715; EG-NEXT: MOV T26.Y, 0.0, 7716; EG-NEXT: LSHR T27.Z, T22.W, literal.y, 7717; EG-NEXT: AND_INT * T27.X, T22.W, literal.x, 7718; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7719; EG-NEXT: MOV T27.Y, 0.0, 7720; EG-NEXT: LSHR * T28.Z, T22.X, literal.x, 7721; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7722; EG-NEXT: AND_INT T28.X, T22.X, literal.x, 7723; EG-NEXT: MOV T28.Y, 0.0, 7724; EG-NEXT: LSHR T22.Z, T22.Y, literal.y, 7725; EG-NEXT: AND_INT * T22.X, T22.Y, literal.x, 7726; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7727; EG-NEXT: MOV T22.Y, 0.0, 7728; EG-NEXT: LSHR * T19.Z, T21.Z, literal.x, 7729; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7730; EG-NEXT: ALU clause starting at 65: 7731; EG-NEXT: AND_INT T19.X, T21.Z, literal.x, 7732; EG-NEXT: MOV T19.Y, 0.0, 7733; EG-NEXT: LSHR T30.Z, T21.W, literal.y, 7734; EG-NEXT: AND_INT * T30.X, T21.W, literal.x, 7735; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7736; EG-NEXT: MOV T30.Y, 0.0, 7737; EG-NEXT: LSHR * T31.Z, T21.X, literal.x, 7738; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7739; EG-NEXT: AND_INT T31.X, T21.X, literal.x, 7740; EG-NEXT: MOV T31.Y, 0.0, 7741; EG-NEXT: LSHR T21.Z, T21.Y, literal.y, 7742; EG-NEXT: AND_INT * T21.X, T21.Y, literal.x, 7743; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7744; EG-NEXT: MOV T21.Y, 0.0, 7745; EG-NEXT: LSHR * T32.Z, T29.Z, literal.x, 7746; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7747; EG-NEXT: AND_INT T32.X, T29.Z, literal.x, 7748; EG-NEXT: MOV T32.Y, 0.0, 7749; EG-NEXT: LSHR T33.Z, T29.W, literal.y, 7750; EG-NEXT: AND_INT * T33.X, T29.W, literal.x, 7751; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7752; EG-NEXT: MOV T33.Y, 0.0, 7753; EG-NEXT: LSHR * T34.Z, T29.X, literal.x, 7754; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7755; EG-NEXT: AND_INT T34.X, T29.X, literal.x, 7756; EG-NEXT: MOV T34.Y, 0.0, 7757; EG-NEXT: LSHR T29.Z, T29.Y, literal.y, 7758; EG-NEXT: AND_INT * T29.X, T29.Y, literal.x, 7759; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7760; EG-NEXT: MOV T29.Y, 0.0, 7761; EG-NEXT: MOV T23.W, 0.0, 7762; EG-NEXT: MOV * T24.W, 0.0, 7763; EG-NEXT: MOV T25.W, 0.0, 7764; EG-NEXT: MOV * T20.W, 0.0, 7765; EG-NEXT: MOV T26.W, 0.0, 7766; EG-NEXT: MOV * T27.W, 0.0, 7767; EG-NEXT: MOV T28.W, 0.0, 7768; EG-NEXT: MOV * T22.W, 0.0, 7769; EG-NEXT: MOV T19.W, 0.0, 7770; EG-NEXT: MOV * T30.W, 0.0, 7771; EG-NEXT: MOV T31.W, 0.0, 7772; EG-NEXT: MOV * T21.W, 0.0, 7773; EG-NEXT: MOV T32.W, 0.0, 7774; EG-NEXT: MOV * T33.W, 0.0, 7775; EG-NEXT: MOV T34.W, 0.0, 7776; EG-NEXT: MOV * T29.W, 0.0, 7777; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7778; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7779; EG-NEXT: LSHR T35.X, PV.W, literal.x, 7780; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 7781; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7782; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7783; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 7784; EG-NEXT: LSHR T37.X, PV.W, literal.x, 7785; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7786; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7787; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7788; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7789; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7790; EG-NEXT: LSHR T39.X, PV.W, literal.x, 7791; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7792; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7793; EG-NEXT: LSHR T40.X, PV.W, literal.x, 7794; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7795; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7796; EG-NEXT: LSHR T41.X, PV.W, literal.x, 7797; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7798; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7799; EG-NEXT: LSHR T42.X, PV.W, literal.x, 7800; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7801; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7802; EG-NEXT: LSHR T43.X, PV.W, literal.x, 7803; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7804; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7805; EG-NEXT: LSHR T44.X, PV.W, literal.x, 7806; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7807; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7808; EG-NEXT: LSHR T45.X, PV.W, literal.x, 7809; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7810; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7811; EG-NEXT: LSHR T46.X, PV.W, literal.x, 7812; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7813; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7814; EG-NEXT: LSHR T47.X, PV.W, literal.x, 7815; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7816; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7817; EG-NEXT: LSHR T48.X, PV.W, literal.x, 7818; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7819; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7820; EG-NEXT: LSHR T49.X, PV.W, literal.x, 7821; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7822; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7823; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 7824; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7825; 7826; CM-LABEL: global_zextload_v32i16_to_v32i64: 7827; CM: ; %bb.0: 7828; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7829; CM-NEXT: TEX 2 @22 7830; CM-NEXT: ALU 33, @31, KC0[], KC1[] 7831; CM-NEXT: TEX 0 @28 7832; CM-NEXT: ALU 94, @65, KC0[CB0:0-32], KC1[] 7833; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T50.X 7834; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T49.X 7835; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T48.X 7836; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T26, T47.X 7837; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T46.X 7838; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T45.X 7839; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T44.X 7840; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T43.X 7841; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T42.X 7842; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T41.X 7843; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T40.X 7844; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T39.X 7845; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T38.X 7846; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T37.X 7847; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T34, T36.X 7848; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T22.X 7849; CM-NEXT: CF_END 7850; CM-NEXT: Fetch clause starting at 22: 7851; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 7852; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 32, #1 7853; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1 7854; CM-NEXT: Fetch clause starting at 28: 7855; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1 7856; CM-NEXT: ALU clause starting at 30: 7857; CM-NEXT: MOV * T19.X, KC0[2].Z, 7858; CM-NEXT: ALU clause starting at 31: 7859; CM-NEXT: LSHR * T23.Z, T20.Y, literal.x, 7860; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7861; CM-NEXT: AND_INT T23.X, T20.Y, literal.x, 7862; CM-NEXT: MOV T23.Y, 0.0, 7863; CM-NEXT: LSHR * T24.Z, T20.X, literal.y, 7864; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7865; CM-NEXT: AND_INT T24.X, T20.X, literal.x, 7866; CM-NEXT: MOV T24.Y, 0.0, 7867; CM-NEXT: LSHR * T25.Z, T20.W, literal.y, 7868; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7869; CM-NEXT: AND_INT T25.X, T20.W, literal.x, 7870; CM-NEXT: MOV T25.Y, 0.0, 7871; CM-NEXT: LSHR * T26.Z, T20.Z, literal.y, 7872; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7873; CM-NEXT: AND_INT T26.X, T20.Z, literal.x, 7874; CM-NEXT: MOV T26.Y, 0.0, 7875; CM-NEXT: LSHR * T20.Z, T22.Y, literal.y, 7876; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7877; CM-NEXT: AND_INT T20.X, T22.Y, literal.x, 7878; CM-NEXT: MOV T20.Y, 0.0, 7879; CM-NEXT: LSHR * T27.Z, T22.X, literal.y, 7880; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7881; CM-NEXT: AND_INT T27.X, T22.X, literal.x, 7882; CM-NEXT: MOV T27.Y, 0.0, 7883; CM-NEXT: LSHR * T28.Z, T22.W, literal.y, 7884; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7885; CM-NEXT: AND_INT T28.X, T22.W, literal.x, 7886; CM-NEXT: MOV T28.Y, 0.0, 7887; CM-NEXT: LSHR * T29.Z, T22.Z, literal.y, 7888; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7889; CM-NEXT: AND_INT T29.X, T22.Z, literal.x, 7890; CM-NEXT: MOV T29.Y, 0.0, 7891; CM-NEXT: LSHR * T19.Z, T21.Y, literal.y, 7892; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7893; CM-NEXT: ALU clause starting at 65: 7894; CM-NEXT: AND_INT T19.X, T21.Y, literal.x, 7895; CM-NEXT: MOV T19.Y, 0.0, 7896; CM-NEXT: LSHR * T30.Z, T21.X, literal.y, 7897; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7898; CM-NEXT: AND_INT T30.X, T21.X, literal.x, 7899; CM-NEXT: MOV T30.Y, 0.0, 7900; CM-NEXT: LSHR * T31.Z, T21.W, literal.y, 7901; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7902; CM-NEXT: AND_INT T31.X, T21.W, literal.x, 7903; CM-NEXT: MOV T31.Y, 0.0, 7904; CM-NEXT: LSHR * T32.Z, T21.Z, literal.y, 7905; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7906; CM-NEXT: AND_INT T32.X, T21.Z, literal.x, 7907; CM-NEXT: MOV T32.Y, 0.0, 7908; CM-NEXT: LSHR * T21.Z, T22.Y, literal.y, 7909; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7910; CM-NEXT: AND_INT T21.X, T22.Y, literal.x, 7911; CM-NEXT: MOV T21.Y, 0.0, 7912; CM-NEXT: LSHR * T33.Z, T22.X, literal.y, 7913; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7914; CM-NEXT: AND_INT T33.X, T22.X, literal.x, 7915; CM-NEXT: MOV T33.Y, 0.0, 7916; CM-NEXT: LSHR * T34.Z, T22.W, literal.y, 7917; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7918; CM-NEXT: AND_INT T34.X, T22.W, literal.x, 7919; CM-NEXT: MOV T34.Y, 0.0, 7920; CM-NEXT: LSHR * T35.Z, T22.Z, literal.y, 7921; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7922; CM-NEXT: AND_INT T35.X, T22.Z, literal.x, 7923; CM-NEXT: MOV T35.Y, 0.0, 7924; CM-NEXT: MOV * T23.W, 0.0, 7925; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 7926; CM-NEXT: MOV * T24.W, 0.0, 7927; CM-NEXT: MOV * T25.W, 0.0, 7928; CM-NEXT: MOV * T26.W, 0.0, 7929; CM-NEXT: MOV * T20.W, 0.0, 7930; CM-NEXT: MOV * T27.W, 0.0, 7931; CM-NEXT: MOV * T28.W, 0.0, 7932; CM-NEXT: MOV * T29.W, 0.0, 7933; CM-NEXT: MOV * T19.W, 0.0, 7934; CM-NEXT: MOV * T30.W, 0.0, 7935; CM-NEXT: MOV * T31.W, 0.0, 7936; CM-NEXT: MOV * T32.W, 0.0, 7937; CM-NEXT: MOV * T21.W, 0.0, 7938; CM-NEXT: MOV * T33.W, 0.0, 7939; CM-NEXT: MOV * T34.W, 0.0, 7940; CM-NEXT: MOV * T35.W, 0.0, 7941; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7942; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 7943; CM-NEXT: LSHR T22.X, PV.W, literal.x, 7944; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7945; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7946; CM-NEXT: LSHR T36.X, PV.W, literal.x, 7947; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7948; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7949; CM-NEXT: LSHR T37.X, PV.W, literal.x, 7950; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7951; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7952; CM-NEXT: LSHR T38.X, PV.W, literal.x, 7953; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7954; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7955; CM-NEXT: LSHR T39.X, PV.W, literal.x, 7956; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7957; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7958; CM-NEXT: LSHR T40.X, PV.W, literal.x, 7959; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7960; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7961; CM-NEXT: LSHR T41.X, PV.W, literal.x, 7962; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7963; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7964; CM-NEXT: LSHR T42.X, PV.W, literal.x, 7965; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7966; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7967; CM-NEXT: LSHR T43.X, PV.W, literal.x, 7968; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7969; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7970; CM-NEXT: LSHR T44.X, PV.W, literal.x, 7971; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7972; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7973; CM-NEXT: LSHR T45.X, PV.W, literal.x, 7974; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7975; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7976; CM-NEXT: LSHR T46.X, PV.W, literal.x, 7977; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7978; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7979; CM-NEXT: LSHR T47.X, PV.W, literal.x, 7980; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7981; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7982; CM-NEXT: LSHR * T48.X, PV.W, literal.x, 7983; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7984; CM-NEXT: LSHR T49.X, KC0[2].Y, literal.x, 7985; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7986; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7987; CM-NEXT: LSHR * T50.X, PV.W, literal.x, 7988; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7989 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 7990 %ext = zext <32 x i16> %load to <32 x i64> 7991 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 7992 ret void 7993} 7994 7995define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 7996; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64: 7997; GCN-NOHSA-SI: ; %bb.0: 7998; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 7999; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 8000; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 8001; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 8002; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 8003; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 8004; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 8005; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 8006; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 8007; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 8008; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 8009; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 8010; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 8011; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:16 8012; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 8013; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, v7 8014; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v16, 0, 16 8015; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[6:7], 48 8016; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8017; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240 8018; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8019; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[4:5], 48 8020; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v5, 0, 16 8021; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8022; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:208 8023; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 8024; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3 8025; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8026; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v5, 0, 16 8027; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[2:3], 48 8028; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8029; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 8030; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8031; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[0:1], 48 8032; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v1, 0, 16 8033; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8034; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144 8035; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 8036; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v15 8037; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8038; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v1, 0, 16 8039; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[14:15], 48 8040; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8041; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 8042; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8043; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[12:13], 48 8044; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v13, 0, 16 8045; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8046; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80 8047; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v11 8048; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8049; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v1, 0, 16 8050; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[10:11], 48 8051; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8052; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 8053; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8054; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[8:9], 48 8055; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v9, 0, 16 8056; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8057; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16 8058; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 8059; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8060; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v1, 0, 16 8061; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v6, 0, 16 8062; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8063; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8064; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:224 8065; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 8066; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 8067; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v4, 0, 16 8068; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v1, 0, 16 8069; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v12 8070; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8071; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8072; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:192 8073; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8074; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v10 8075; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v7, 0, 16 8076; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v2, 0, 16 8077; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8078; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8079; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:160 8080; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8081; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v8 8082; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v1, 0, 16 8083; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v8, 0, 16 8084; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v10, 0, 16 8085; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v6, 0, 16 8086; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v9, 0, 16 8087; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v12, 0, 16 8088; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14 8089; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v14, 0, 16 8090; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v2, 0, 16 8091; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 8092; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v0, 0, 16 8093; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v2, 0, 16 8094; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8095; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8096; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8097; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8098; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8099; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8100; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8101; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8102; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8103; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 8104; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128 8105; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96 8106; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64 8107; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32 8108; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 8109; GCN-NOHSA-SI-NEXT: s_endpgm 8110; 8111; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64: 8112; GCN-HSA: ; %bb.0: 8113; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 8114; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 8115; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 8116; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8117; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8118; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 8119; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 8120; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8121; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 8122; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 8123; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 8124; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8125; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 8126; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 8127; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 8128; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 8129; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 8130; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 8131; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 8132; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 8133; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 8134; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8135; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 8136; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 8137; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5 8138; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4 8139; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 8140; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16 8141; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[0:1], 48 8142; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8143; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8144; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 8145; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 8146; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 8147; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8148; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xd0 8149; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 8150; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xb0 8151; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 8152; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x90 8153; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 8154; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x70 8155; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 8156; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x50 8157; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3 8158; GCN-HSA-NEXT: v_bfe_i32 v16, v1, 0, 16 8159; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 8160; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[2:3], 48 8161; GCN-HSA-NEXT: s_add_u32 s14, s0, 32 8162; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8163; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v2 8164; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8165; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 8166; GCN-HSA-NEXT: v_bfe_i32 v18, v1, 0, 16 8167; GCN-HSA-NEXT: v_bfe_i32 v16, v2, 0, 16 8168; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14 8169; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8170; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8171; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 8172; GCN-HSA-NEXT: flat_store_dwordx4 v[1:2], v[16:19] 8173; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v0 8174; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 8175; GCN-HSA-NEXT: v_bfe_i32 v2, v1, 0, 16 8176; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 8177; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 8178; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8179; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8180; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 8181; GCN-HSA-NEXT: v_mov_b32_e32 v17, s5 8182; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 8183; GCN-HSA-NEXT: v_bfe_i32 v0, v5, 0, 16 8184; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[4:5], 48 8185; GCN-HSA-NEXT: v_mov_b32_e32 v16, s4 8186; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8187; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 8188; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 8189; GCN-HSA-NEXT: v_mov_b32_e32 v0, v7 8190; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 8191; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[6:7], 48 8192; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 8193; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8194; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 8195; GCN-HSA-NEXT: v_mov_b32_e32 v19, s9 8196; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 8197; GCN-HSA-NEXT: v_bfe_i32 v0, v9, 0, 16 8198; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[8:9], 48 8199; GCN-HSA-NEXT: v_mov_b32_e32 v18, s8 8200; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8201; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 8202; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 8203; GCN-HSA-NEXT: v_mov_b32_e32 v0, v11 8204; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 8205; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[10:11], 48 8206; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 8207; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8208; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 8209; GCN-HSA-NEXT: v_mov_b32_e32 v17, s13 8210; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 8211; GCN-HSA-NEXT: v_bfe_i32 v0, v13, 0, 16 8212; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[12:13], 48 8213; GCN-HSA-NEXT: v_mov_b32_e32 v16, s12 8214; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8215; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 8216; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 8217; GCN-HSA-NEXT: v_mov_b32_e32 v0, v15 8218; GCN-HSA-NEXT: v_mov_b32_e32 v19, s11 8219; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 8220; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[14:15], 48 8221; GCN-HSA-NEXT: v_mov_b32_e32 v18, s10 8222; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8223; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v6 8224; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8225; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[0:3] 8226; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v4 8227; GCN-HSA-NEXT: v_bfe_i32 v0, v6, 0, 16 8228; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 8229; GCN-HSA-NEXT: v_mov_b32_e32 v6, s3 8230; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 8231; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 8232; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8233; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8234; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8235; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[0:3] 8236; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v10 8237; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 8238; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 8239; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 8240; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 8241; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8242; GCN-HSA-NEXT: v_bfe_i32 v2, v7, 0, 16 8243; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 8244; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8245; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8246; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8247; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v14 8248; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v12 8249; GCN-HSA-NEXT: v_bfe_i32 v0, v12, 0, 16 8250; GCN-HSA-NEXT: v_bfe_i32 v4, v14, 0, 16 8251; GCN-HSA-NEXT: v_bfe_i32 v14, v15, 0, 16 8252; GCN-HSA-NEXT: v_bfe_i32 v12, v10, 0, 16 8253; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 8254; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 8255; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12 8256; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8257; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8258; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v8 8259; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 8260; GCN-HSA-NEXT: v_bfe_i32 v10, v11, 0, 16 8261; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 8262; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16 8263; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 8264; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 8265; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8266; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8267; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 8268; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 8269; GCN-HSA-NEXT: v_bfe_i32 v6, v6, 0, 16 8270; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 8271; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 8272; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 8273; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 8274; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 8275; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 8276; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 8277; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 8278; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8279; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 8280; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8281; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 8282; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8283; GCN-HSA-NEXT: s_endpgm 8284; 8285; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64: 8286; GCN-NOHSA-VI: ; %bb.0: 8287; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 8288; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 8289; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 8290; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 8291; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 8292; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 8293; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 8294; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 8295; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 8296; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 8297; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 8298; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 8299; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 8300; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 8301; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 8302; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v14 8303; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v16, 0, 16 8304; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v14, 0, 16 8305; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v14, v15 8306; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8307; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8308; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v15 8309; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:224 8310; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v14, 0, 16 8311; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v15, 0, 16 8312; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8313; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8314; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:240 8315; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v5, 0, 16 8316; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v12 8317; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v14, 0, 16 8318; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v12, 0, 16 8319; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8320; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8321; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192 8322; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v13, 0, 16 8323; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v13 8324; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v14, 0, 16 8325; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 8326; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8327; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208 8328; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v16, v11 8329; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v10 8330; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v11 8331; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v10, 0, 16 8332; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v16, 0, 16 8333; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v12, 0, 16 8334; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v17, 0, 16 8335; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8336; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8337; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:160 8338; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8339; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8340; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:176 8341; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16 8342; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v8 8343; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16 8344; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v8, 0, 16 8345; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8346; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8347; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v9 8348; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:128 8349; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 8350; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16 8351; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v8, 0, 16 8352; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v8, v7 8353; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8354; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8355; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 8356; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:144 8357; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v6 8358; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v6, 0, 16 8359; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v8, 0, 16 8360; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v7, 0, 16 8361; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v9, 0, 16 8362; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 8363; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8364; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112 8365; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8366; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 8367; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v6, 0, 16 8368; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v4, 0, 16 8369; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8370; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 8371; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8372; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:96 8373; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:64 8374; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v0 8375; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v0, 0, 16 8376; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v0, 16, v2 8377; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v0, 0, 16 8378; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v0, 16, v5 8379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v3 8380; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 8381; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v0, 0, 16 8382; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v1, 0, 16 8383; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v11, 0, 16 8384; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v2, 0, 16 8385; GCN-NOHSA-VI-NEXT: v_bfe_i32 v1, v4, 0, 16 8386; GCN-NOHSA-VI-NEXT: v_bfe_i32 v3, v3, 0, 16 8387; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8388; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20 8389; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 8390; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 8391; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 8392; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8393; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8394; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8395; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 8396; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8397; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:80 8398; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32 8399; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:48 8400; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 8401; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:16 8402; GCN-NOHSA-VI-NEXT: s_endpgm 8403; 8404; EG-LABEL: global_sextload_v32i16_to_v32i64: 8405; EG: ; %bb.0: 8406; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8407; EG-NEXT: TEX 0 @22 8408; EG-NEXT: ALU 56, @31, KC0[CB0:0-32], KC1[] 8409; EG-NEXT: TEX 2 @24 8410; EG-NEXT: ALU 74, @88, KC0[CB0:0-32], KC1[] 8411; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 8412; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 8413; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0 8414; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0 8415; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 8416; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 8417; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0 8418; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0 8419; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 8420; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 8421; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0 8422; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0 8423; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 8424; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 8425; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 8426; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 8427; EG-NEXT: CF_END 8428; EG-NEXT: Fetch clause starting at 22: 8429; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 8430; EG-NEXT: Fetch clause starting at 24: 8431; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 8432; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 8433; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 8434; EG-NEXT: ALU clause starting at 30: 8435; EG-NEXT: MOV * T19.X, KC0[2].Z, 8436; EG-NEXT: ALU clause starting at 31: 8437; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8438; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8439; EG-NEXT: LSHR T21.X, PV.W, literal.x, 8440; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.x, 8441; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8442; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8443; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 8444; EG-NEXT: LSHR T23.X, PV.W, literal.x, 8445; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8446; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8447; EG-NEXT: LSHR T24.X, PV.W, literal.x, 8448; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8449; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8450; EG-NEXT: LSHR T25.X, PV.W, literal.x, 8451; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8452; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8453; EG-NEXT: LSHR T26.X, PV.W, literal.x, 8454; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8455; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8456; EG-NEXT: LSHR T27.X, PV.W, literal.x, 8457; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8458; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8459; EG-NEXT: LSHR T28.X, PV.W, literal.x, 8460; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8461; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8462; EG-NEXT: LSHR T29.X, PV.W, literal.x, 8463; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8464; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8465; EG-NEXT: LSHR T30.X, PV.W, literal.x, 8466; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8467; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8468; EG-NEXT: LSHR T31.X, PV.W, literal.x, 8469; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8470; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8471; EG-NEXT: LSHR T32.X, PV.W, literal.x, 8472; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8473; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8474; EG-NEXT: LSHR T33.X, PV.W, literal.x, 8475; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8476; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8477; EG-NEXT: LSHR T34.X, PV.W, literal.x, 8478; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 8479; EG-NEXT: ASHR * T35.W, T20.Y, literal.z, 8480; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8481; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8482; EG-NEXT: LSHR T36.X, PV.W, literal.x, 8483; EG-NEXT: ASHR T35.Z, T20.Y, literal.y, 8484; EG-NEXT: ASHR * T37.W, T20.X, literal.z, 8485; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8486; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8487; EG-NEXT: BFE_INT T35.X, T20.Y, 0.0, literal.x, 8488; EG-NEXT: ASHR * T37.Z, T20.X, literal.x, 8489; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8490; EG-NEXT: BFE_INT T37.X, T20.X, 0.0, literal.x, 8491; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 8492; EG-NEXT: ASHR * T19.W, T20.W, literal.y, 8493; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8494; EG-NEXT: ALU clause starting at 88: 8495; EG-NEXT: ASHR T19.Z, T20.W, literal.x, 8496; EG-NEXT: ASHR * T41.W, T20.Z, literal.y, 8497; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8498; EG-NEXT: BFE_INT T19.X, T20.W, 0.0, literal.x, 8499; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 8500; EG-NEXT: ASHR T41.Z, T20.Z, literal.x, 8501; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 8502; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8503; EG-NEXT: BFE_INT T41.X, T20.Z, 0.0, literal.x, 8504; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 8505; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 8506; EG-NEXT: ASHR * T42.W, T40.X, literal.y, 8507; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8508; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 8509; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 8510; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 8511; EG-NEXT: ASHR * T43.W, T40.W, literal.y, 8512; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8513; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 8514; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 8515; EG-NEXT: ASHR T43.Z, T40.W, literal.x, 8516; EG-NEXT: ASHR * T44.W, T40.Z, literal.y, 8517; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8518; EG-NEXT: BFE_INT T43.X, T40.W, 0.0, literal.x, 8519; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 8520; EG-NEXT: ASHR T44.Z, T40.Z, literal.x, 8521; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 8522; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8523; EG-NEXT: BFE_INT T44.X, T40.Z, 0.0, literal.x, 8524; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 8525; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 8526; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 8527; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8528; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 8529; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 8530; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 8531; EG-NEXT: ASHR * T46.W, T39.W, literal.y, 8532; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8533; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 8534; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 8535; EG-NEXT: ASHR T46.Z, T39.W, literal.x, 8536; EG-NEXT: ASHR * T47.W, T39.Z, literal.y, 8537; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8538; EG-NEXT: BFE_INT T46.X, T39.W, 0.0, literal.x, 8539; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 8540; EG-NEXT: ASHR T47.Z, T39.Z, literal.x, 8541; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 8542; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8543; EG-NEXT: BFE_INT T47.X, T39.Z, 0.0, literal.x, 8544; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 8545; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 8546; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 8547; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8548; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 8549; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 8550; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 8551; EG-NEXT: ASHR * T49.W, T38.W, literal.y, 8552; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8553; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 8554; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 8555; EG-NEXT: ASHR T49.Z, T38.W, literal.x, 8556; EG-NEXT: ASHR * T50.W, T38.Z, literal.y, 8557; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8558; EG-NEXT: BFE_INT T49.X, T38.W, 0.0, literal.x, 8559; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 8560; EG-NEXT: ASHR * T50.Z, T38.Z, literal.x, 8561; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8562; EG-NEXT: BFE_INT T50.X, T38.Z, 0.0, literal.x, 8563; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 8564; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 8565; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8566; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8567; EG-NEXT: LSHR T38.X, PV.W, literal.x, 8568; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 8569; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8570; 8571; CM-LABEL: global_sextload_v32i16_to_v32i64: 8572; CM: ; %bb.0: 8573; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8574; CM-NEXT: TEX 0 @22 8575; CM-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 8576; CM-NEXT: TEX 2 @24 8577; CM-NEXT: ALU 73, @87, KC0[CB0:0-32], KC1[] 8578; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T50.X 8579; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T49, T36.X 8580; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T48, T34.X 8581; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T33.X 8582; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T39, T32.X 8583; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T46, T31.X 8584; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T30.X 8585; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T29.X 8586; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T28.X 8587; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T27.X 8588; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T26.X 8589; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T25.X 8590; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T24.X 8591; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T23.X 8592; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T22.X 8593; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T21.X 8594; CM-NEXT: CF_END 8595; CM-NEXT: Fetch clause starting at 22: 8596; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 8597; CM-NEXT: Fetch clause starting at 24: 8598; CM-NEXT: VTX_READ_128 T38.XYZW, T19.X, 0, #1 8599; CM-NEXT: VTX_READ_128 T39.XYZW, T19.X, 16, #1 8600; CM-NEXT: VTX_READ_128 T40.XYZW, T19.X, 32, #1 8601; CM-NEXT: ALU clause starting at 30: 8602; CM-NEXT: MOV * T19.X, KC0[2].Z, 8603; CM-NEXT: ALU clause starting at 31: 8604; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8605; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8606; CM-NEXT: LSHR T21.X, PV.W, literal.x, 8607; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8608; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8609; CM-NEXT: LSHR T22.X, PV.W, literal.x, 8610; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8611; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8612; CM-NEXT: LSHR T23.X, PV.W, literal.x, 8613; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8614; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8615; CM-NEXT: LSHR T24.X, PV.W, literal.x, 8616; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8617; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8618; CM-NEXT: LSHR T25.X, PV.W, literal.x, 8619; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8620; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8621; CM-NEXT: LSHR T26.X, PV.W, literal.x, 8622; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8623; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8624; CM-NEXT: LSHR T27.X, PV.W, literal.x, 8625; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8626; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8627; CM-NEXT: LSHR T28.X, PV.W, literal.x, 8628; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8629; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8630; CM-NEXT: LSHR T29.X, PV.W, literal.x, 8631; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8632; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8633; CM-NEXT: LSHR T30.X, PV.W, literal.x, 8634; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8635; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8636; CM-NEXT: LSHR T31.X, PV.W, literal.x, 8637; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8638; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8639; CM-NEXT: LSHR T32.X, PV.W, literal.x, 8640; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8641; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8642; CM-NEXT: LSHR T33.X, PV.W, literal.x, 8643; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8644; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8645; CM-NEXT: LSHR T34.X, PV.W, literal.x, 8646; CM-NEXT: ASHR * T35.W, T20.Z, literal.y, 8647; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8648; CM-NEXT: LSHR T36.X, KC0[2].Y, literal.x, 8649; CM-NEXT: ASHR T35.Z, T20.Z, literal.y, 8650; CM-NEXT: ASHR * T37.W, T20.W, literal.z, 8651; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8652; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8653; CM-NEXT: BFE_INT T35.X, T20.Z, 0.0, literal.x, 8654; CM-NEXT: ASHR * T37.Z, T20.W, literal.x, 8655; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8656; CM-NEXT: BFE_INT T37.X, T20.W, 0.0, literal.x, 8657; CM-NEXT: ASHR T35.Y, PV.X, literal.y, 8658; CM-NEXT: ASHR * T19.W, T20.X, literal.y, 8659; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8660; CM-NEXT: ALU clause starting at 87: 8661; CM-NEXT: ASHR T19.Z, T20.X, literal.x, 8662; CM-NEXT: ASHR * T20.W, T20.Y, literal.y, 8663; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8664; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 8665; CM-NEXT: ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212 8666; CM-NEXT: ASHR T20.Z, T20.Y, literal.x, 8667; CM-NEXT: ASHR * T41.W, T40.Z, literal.y, 8668; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8669; CM-NEXT: BFE_INT T20.X, T20.Y, 0.0, literal.x, 8670; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 8671; CM-NEXT: ASHR T41.Z, T40.Z, literal.x, 8672; CM-NEXT: ASHR * T42.W, T40.W, literal.y, 8673; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8674; CM-NEXT: BFE_INT T41.X, T40.Z, 0.0, literal.x, 8675; CM-NEXT: ASHR T20.Y, PV.X, literal.y, 8676; CM-NEXT: ASHR T42.Z, T40.W, literal.x, 8677; CM-NEXT: ASHR * T43.W, T40.X, literal.y, 8678; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8679; CM-NEXT: BFE_INT T42.X, T40.W, 0.0, literal.x, 8680; CM-NEXT: ASHR T41.Y, PV.X, literal.y, 8681; CM-NEXT: ASHR T43.Z, T40.X, literal.x, 8682; CM-NEXT: ASHR * T40.W, T40.Y, literal.y, 8683; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8684; CM-NEXT: BFE_INT T43.X, T40.X, 0.0, literal.x, 8685; CM-NEXT: ASHR T42.Y, PV.X, literal.y, 8686; CM-NEXT: ASHR T40.Z, T40.Y, literal.x, 8687; CM-NEXT: ASHR * T44.W, T39.Z, literal.y, 8688; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8689; CM-NEXT: BFE_INT T40.X, T40.Y, 0.0, literal.x, 8690; CM-NEXT: ASHR T43.Y, PV.X, literal.y, 8691; CM-NEXT: ASHR T44.Z, T39.Z, literal.x, 8692; CM-NEXT: ASHR * T45.W, T39.W, literal.y, 8693; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8694; CM-NEXT: BFE_INT T44.X, T39.Z, 0.0, literal.x, 8695; CM-NEXT: ASHR T40.Y, PV.X, literal.y, 8696; CM-NEXT: ASHR T45.Z, T39.W, literal.x, 8697; CM-NEXT: ASHR * T46.W, T39.X, literal.y, 8698; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8699; CM-NEXT: BFE_INT T45.X, T39.W, 0.0, literal.x, 8700; CM-NEXT: ASHR T44.Y, PV.X, literal.y, 8701; CM-NEXT: ASHR T46.Z, T39.X, literal.x, 8702; CM-NEXT: ASHR * T39.W, T39.Y, literal.y, 8703; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8704; CM-NEXT: BFE_INT T46.X, T39.X, 0.0, literal.x, 8705; CM-NEXT: ASHR T45.Y, PV.X, literal.y, 8706; CM-NEXT: ASHR T39.Z, T39.Y, literal.x, 8707; CM-NEXT: ASHR * T47.W, T38.Z, literal.y, 8708; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8709; CM-NEXT: BFE_INT T39.X, T39.Y, 0.0, literal.x, 8710; CM-NEXT: ASHR T46.Y, PV.X, literal.y, 8711; CM-NEXT: ASHR T47.Z, T38.Z, literal.x, 8712; CM-NEXT: ASHR * T48.W, T38.W, literal.y, 8713; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8714; CM-NEXT: BFE_INT T47.X, T38.Z, 0.0, literal.x, 8715; CM-NEXT: ASHR T39.Y, PV.X, literal.y, 8716; CM-NEXT: ASHR T48.Z, T38.W, literal.x, 8717; CM-NEXT: ASHR * T49.W, T38.X, literal.y, 8718; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8719; CM-NEXT: BFE_INT T48.X, T38.W, 0.0, literal.x, 8720; CM-NEXT: ASHR T47.Y, PV.X, literal.y, 8721; CM-NEXT: ASHR T49.Z, T38.X, literal.x, 8722; CM-NEXT: ASHR * T38.W, T38.Y, literal.y, 8723; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8724; CM-NEXT: BFE_INT T49.X, T38.X, 0.0, literal.x, 8725; CM-NEXT: ASHR T48.Y, PV.X, literal.y, 8726; CM-NEXT: ASHR * T38.Z, T38.Y, literal.x, 8727; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8728; CM-NEXT: BFE_INT T38.X, T38.Y, 0.0, literal.x, 8729; CM-NEXT: ASHR T49.Y, PV.X, literal.y, 8730; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8731; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8732; CM-NEXT: LSHR T50.X, PV.W, literal.x, 8733; CM-NEXT: ASHR * T38.Y, PV.X, literal.y, 8734; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8735 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 8736 %ext = sext <32 x i16> %load to <32 x i64> 8737 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 8738 ret void 8739} 8740 8741; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 8742; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 8743; %ext = zext <64 x i16> %load to <64 x i64> 8744; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 8745; ret void 8746; } 8747 8748; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 8749; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 8750; %ext = sext <64 x i16> %load to <64 x i64> 8751; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 8752; ret void 8753; } 8754 8755attributes #0 = { nounwind } 8756