1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { 6; GFX9-LABEL: sample_l_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b32 s0, s2 9; GFX9-NEXT: s_mov_b32 s1, s3 10; GFX9-NEXT: s_mov_b32 s2, s4 11; GFX9-NEXT: s_mov_b32 s3, s5 12; GFX9-NEXT: s_mov_b32 s4, s6 13; GFX9-NEXT: s_mov_b32 s5, s7 14; GFX9-NEXT: s_mov_b32 s6, s8 15; GFX9-NEXT: s_mov_b32 s7, s9 16; GFX9-NEXT: s_mov_b32 s8, s10 17; GFX9-NEXT: s_mov_b32 s9, s11 18; GFX9-NEXT: s_mov_b32 s10, s12 19; GFX9-NEXT: s_mov_b32 s11, s13 20; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 21; GFX9-NEXT: s_waitcnt vmcnt(0) 22; GFX9-NEXT: ; return to shader part epilog 23; 24; GFX10-LABEL: sample_l_1d: 25; GFX10: ; %bb.0: ; %main_body 26; GFX10-NEXT: s_mov_b32 s0, s2 27; GFX10-NEXT: s_mov_b32 s1, s3 28; GFX10-NEXT: s_mov_b32 s2, s4 29; GFX10-NEXT: s_mov_b32 s3, s5 30; GFX10-NEXT: s_mov_b32 s4, s6 31; GFX10-NEXT: s_mov_b32 s5, s7 32; GFX10-NEXT: s_mov_b32 s6, s8 33; GFX10-NEXT: s_mov_b32 s7, s9 34; GFX10-NEXT: s_mov_b32 s8, s10 35; GFX10-NEXT: s_mov_b32 s9, s11 36; GFX10-NEXT: s_mov_b32 s10, s12 37; GFX10-NEXT: s_mov_b32 s11, s13 38; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 39; GFX10-NEXT: s_waitcnt vmcnt(0) 40; GFX10-NEXT: ; return to shader part epilog 41main_body: 42 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 43 ret <4 x float> %v 44} 45 46define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 47; GFX9-LABEL: sample_l_2d: 48; GFX9: ; %bb.0: ; %main_body 49; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 50; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 51; GFX9-NEXT: s_mov_b32 s0, s2 52; GFX9-NEXT: s_mov_b32 s1, s3 53; GFX9-NEXT: s_mov_b32 s2, s4 54; GFX9-NEXT: s_mov_b32 s3, s5 55; GFX9-NEXT: s_mov_b32 s4, s6 56; GFX9-NEXT: s_mov_b32 s5, s7 57; GFX9-NEXT: s_mov_b32 s6, s8 58; GFX9-NEXT: s_mov_b32 s7, s9 59; GFX9-NEXT: s_mov_b32 s8, s10 60; GFX9-NEXT: s_mov_b32 s9, s11 61; GFX9-NEXT: s_mov_b32 s10, s12 62; GFX9-NEXT: s_mov_b32 s11, s13 63; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 64; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 65; GFX9-NEXT: s_waitcnt vmcnt(0) 66; GFX9-NEXT: ; return to shader part epilog 67; 68; GFX10-LABEL: sample_l_2d: 69; GFX10: ; %bb.0: ; %main_body 70; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 71; GFX10-NEXT: s_mov_b32 s0, s2 72; GFX10-NEXT: s_mov_b32 s1, s3 73; GFX10-NEXT: s_mov_b32 s2, s4 74; GFX10-NEXT: s_mov_b32 s3, s5 75; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, v1 76; GFX10-NEXT: s_mov_b32 s4, s6 77; GFX10-NEXT: s_mov_b32 s5, s7 78; GFX10-NEXT: s_mov_b32 s6, s8 79; GFX10-NEXT: s_mov_b32 s7, s9 80; GFX10-NEXT: s_mov_b32 s8, s10 81; GFX10-NEXT: s_mov_b32 s9, s11 82; GFX10-NEXT: s_mov_b32 s10, s12 83; GFX10-NEXT: s_mov_b32 s11, s13 84; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 85; GFX10-NEXT: s_waitcnt vmcnt(0) 86; GFX10-NEXT: ; return to shader part epilog 87main_body: 88 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 89 ret <4 x float> %v 90} 91 92define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { 93; GFX9-LABEL: sample_c_l_1d: 94; GFX9: ; %bb.0: ; %main_body 95; GFX9-NEXT: s_mov_b32 s0, s2 96; GFX9-NEXT: s_mov_b32 s2, s4 97; GFX9-NEXT: s_mov_b32 s4, s6 98; GFX9-NEXT: s_mov_b32 s6, s8 99; GFX9-NEXT: s_mov_b32 s8, s10 100; GFX9-NEXT: s_mov_b32 s10, s12 101; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 102; GFX9-NEXT: s_lshl_b32 s12, s0, 16 103; GFX9-NEXT: s_mov_b32 s1, s3 104; GFX9-NEXT: s_mov_b32 s3, s5 105; GFX9-NEXT: s_mov_b32 s5, s7 106; GFX9-NEXT: s_mov_b32 s7, s9 107; GFX9-NEXT: s_mov_b32 s9, s11 108; GFX9-NEXT: s_mov_b32 s11, s13 109; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s12 110; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 111; GFX9-NEXT: s_waitcnt vmcnt(0) 112; GFX9-NEXT: ; return to shader part epilog 113; 114; GFX10-LABEL: sample_c_l_1d: 115; GFX10: ; %bb.0: ; %main_body 116; GFX10-NEXT: s_mov_b32 s0, s2 117; GFX10-NEXT: s_mov_b32 s2, s4 118; GFX10-NEXT: s_mov_b32 s4, s6 119; GFX10-NEXT: s_mov_b32 s6, s8 120; GFX10-NEXT: s_mov_b32 s8, s10 121; GFX10-NEXT: s_mov_b32 s10, s12 122; GFX10-NEXT: s_lshl_b32 s12, s0, 16 123; GFX10-NEXT: s_mov_b32 s1, s3 124; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, s12 125; GFX10-NEXT: s_mov_b32 s3, s5 126; GFX10-NEXT: s_mov_b32 s5, s7 127; GFX10-NEXT: s_mov_b32 s7, s9 128; GFX10-NEXT: s_mov_b32 s9, s11 129; GFX10-NEXT: s_mov_b32 s11, s13 130; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 131; GFX10-NEXT: s_waitcnt vmcnt(0) 132; GFX10-NEXT: ; return to shader part epilog 133main_body: 134 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 135 ret <4 x float> %v 136} 137 138define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 139; GFX9-LABEL: sample_c_l_2d: 140; GFX9: ; %bb.0: ; %main_body 141; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 142; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 143; GFX9-NEXT: s_mov_b32 s0, s2 144; GFX9-NEXT: s_mov_b32 s1, s3 145; GFX9-NEXT: s_mov_b32 s2, s4 146; GFX9-NEXT: s_mov_b32 s3, s5 147; GFX9-NEXT: s_mov_b32 s4, s6 148; GFX9-NEXT: s_mov_b32 s5, s7 149; GFX9-NEXT: s_mov_b32 s6, s8 150; GFX9-NEXT: s_mov_b32 s7, s9 151; GFX9-NEXT: s_mov_b32 s8, s10 152; GFX9-NEXT: s_mov_b32 s9, s11 153; GFX9-NEXT: s_mov_b32 s10, s12 154; GFX9-NEXT: s_mov_b32 s11, s13 155; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 156; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 157; GFX9-NEXT: s_waitcnt vmcnt(0) 158; GFX9-NEXT: ; return to shader part epilog 159; 160; GFX10-LABEL: sample_c_l_2d: 161; GFX10: ; %bb.0: ; %main_body 162; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 163; GFX10-NEXT: s_mov_b32 s0, s2 164; GFX10-NEXT: s_mov_b32 s1, s3 165; GFX10-NEXT: s_mov_b32 s2, s4 166; GFX10-NEXT: s_mov_b32 s3, s5 167; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 168; GFX10-NEXT: s_mov_b32 s4, s6 169; GFX10-NEXT: s_mov_b32 s5, s7 170; GFX10-NEXT: s_mov_b32 s6, s8 171; GFX10-NEXT: s_mov_b32 s7, s9 172; GFX10-NEXT: s_mov_b32 s8, s10 173; GFX10-NEXT: s_mov_b32 s9, s11 174; GFX10-NEXT: s_mov_b32 s10, s12 175; GFX10-NEXT: s_mov_b32 s11, s13 176; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 177; GFX10-NEXT: s_waitcnt vmcnt(0) 178; GFX10-NEXT: ; return to shader part epilog 179main_body: 180 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 181 ret <4 x float> %v 182} 183 184define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %lod) { 185; GFX9-LABEL: sample_l_o_1d: 186; GFX9: ; %bb.0: ; %main_body 187; GFX9-NEXT: s_mov_b32 s0, s2 188; GFX9-NEXT: s_mov_b32 s2, s4 189; GFX9-NEXT: s_mov_b32 s4, s6 190; GFX9-NEXT: s_mov_b32 s6, s8 191; GFX9-NEXT: s_mov_b32 s8, s10 192; GFX9-NEXT: s_mov_b32 s10, s12 193; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 194; GFX9-NEXT: s_lshl_b32 s12, s0, 16 195; GFX9-NEXT: s_mov_b32 s1, s3 196; GFX9-NEXT: s_mov_b32 s3, s5 197; GFX9-NEXT: s_mov_b32 s5, s7 198; GFX9-NEXT: s_mov_b32 s7, s9 199; GFX9-NEXT: s_mov_b32 s9, s11 200; GFX9-NEXT: s_mov_b32 s11, s13 201; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s12 202; GFX9-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 203; GFX9-NEXT: s_waitcnt vmcnt(0) 204; GFX9-NEXT: ; return to shader part epilog 205; 206; GFX10-LABEL: sample_l_o_1d: 207; GFX10: ; %bb.0: ; %main_body 208; GFX10-NEXT: s_mov_b32 s0, s2 209; GFX10-NEXT: s_mov_b32 s2, s4 210; GFX10-NEXT: s_mov_b32 s4, s6 211; GFX10-NEXT: s_mov_b32 s6, s8 212; GFX10-NEXT: s_mov_b32 s8, s10 213; GFX10-NEXT: s_mov_b32 s10, s12 214; GFX10-NEXT: s_lshl_b32 s12, s0, 16 215; GFX10-NEXT: s_mov_b32 s1, s3 216; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, s12 217; GFX10-NEXT: s_mov_b32 s3, s5 218; GFX10-NEXT: s_mov_b32 s5, s7 219; GFX10-NEXT: s_mov_b32 s7, s9 220; GFX10-NEXT: s_mov_b32 s9, s11 221; GFX10-NEXT: s_mov_b32 s11, s13 222; GFX10-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 223; GFX10-NEXT: s_waitcnt vmcnt(0) 224; GFX10-NEXT: ; return to shader part epilog 225main_body: 226 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 15, i32 %offset, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 227 ret <4 x float> %v 228} 229 230define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) { 231; GFX9-LABEL: sample_l_o_2d: 232; GFX9: ; %bb.0: ; %main_body 233; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 234; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 235; GFX9-NEXT: s_mov_b32 s0, s2 236; GFX9-NEXT: s_mov_b32 s1, s3 237; GFX9-NEXT: s_mov_b32 s2, s4 238; GFX9-NEXT: s_mov_b32 s3, s5 239; GFX9-NEXT: s_mov_b32 s4, s6 240; GFX9-NEXT: s_mov_b32 s5, s7 241; GFX9-NEXT: s_mov_b32 s6, s8 242; GFX9-NEXT: s_mov_b32 s7, s9 243; GFX9-NEXT: s_mov_b32 s8, s10 244; GFX9-NEXT: s_mov_b32 s9, s11 245; GFX9-NEXT: s_mov_b32 s10, s12 246; GFX9-NEXT: s_mov_b32 s11, s13 247; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 248; GFX9-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 249; GFX9-NEXT: s_waitcnt vmcnt(0) 250; GFX9-NEXT: ; return to shader part epilog 251; 252; GFX10-LABEL: sample_l_o_2d: 253; GFX10: ; %bb.0: ; %main_body 254; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 255; GFX10-NEXT: s_mov_b32 s0, s2 256; GFX10-NEXT: s_mov_b32 s1, s3 257; GFX10-NEXT: s_mov_b32 s2, s4 258; GFX10-NEXT: s_mov_b32 s3, s5 259; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 260; GFX10-NEXT: s_mov_b32 s4, s6 261; GFX10-NEXT: s_mov_b32 s5, s7 262; GFX10-NEXT: s_mov_b32 s6, s8 263; GFX10-NEXT: s_mov_b32 s7, s9 264; GFX10-NEXT: s_mov_b32 s8, s10 265; GFX10-NEXT: s_mov_b32 s9, s11 266; GFX10-NEXT: s_mov_b32 s10, s12 267; GFX10-NEXT: s_mov_b32 s11, s13 268; GFX10-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 269; GFX10-NEXT: s_waitcnt vmcnt(0) 270; GFX10-NEXT: ; return to shader part epilog 271main_body: 272 %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 273 ret <4 x float> %v 274} 275 276define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %lod) { 277; GFX9-LABEL: sample_c_l_o_1d: 278; GFX9: ; %bb.0: ; %main_body 279; GFX9-NEXT: s_mov_b32 s0, s2 280; GFX9-NEXT: s_mov_b32 s2, s4 281; GFX9-NEXT: s_mov_b32 s4, s6 282; GFX9-NEXT: s_mov_b32 s6, s8 283; GFX9-NEXT: s_mov_b32 s8, s10 284; GFX9-NEXT: s_mov_b32 s10, s12 285; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 286; GFX9-NEXT: s_lshl_b32 s12, s0, 16 287; GFX9-NEXT: s_mov_b32 s1, s3 288; GFX9-NEXT: s_mov_b32 s3, s5 289; GFX9-NEXT: s_mov_b32 s5, s7 290; GFX9-NEXT: s_mov_b32 s7, s9 291; GFX9-NEXT: s_mov_b32 s9, s11 292; GFX9-NEXT: s_mov_b32 s11, s13 293; GFX9-NEXT: v_and_or_b32 v2, v2, v3, s12 294; GFX9-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 295; GFX9-NEXT: s_waitcnt vmcnt(0) 296; GFX9-NEXT: ; return to shader part epilog 297; 298; GFX10-LABEL: sample_c_l_o_1d: 299; GFX10: ; %bb.0: ; %main_body 300; GFX10-NEXT: s_mov_b32 s0, s2 301; GFX10-NEXT: s_mov_b32 s2, s4 302; GFX10-NEXT: s_mov_b32 s4, s6 303; GFX10-NEXT: s_mov_b32 s6, s8 304; GFX10-NEXT: s_mov_b32 s8, s10 305; GFX10-NEXT: s_mov_b32 s10, s12 306; GFX10-NEXT: s_lshl_b32 s12, s0, 16 307; GFX10-NEXT: s_mov_b32 s1, s3 308; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, s12 309; GFX10-NEXT: s_mov_b32 s3, s5 310; GFX10-NEXT: s_mov_b32 s5, s7 311; GFX10-NEXT: s_mov_b32 s7, s9 312; GFX10-NEXT: s_mov_b32 s9, s11 313; GFX10-NEXT: s_mov_b32 s11, s13 314; GFX10-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 315; GFX10-NEXT: s_waitcnt vmcnt(0) 316; GFX10-NEXT: ; return to shader part epilog 317main_body: 318 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 319 ret <4 x float> %v 320} 321 322define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) { 323; GFX9-LABEL: sample_c_l_o_2d: 324; GFX9: ; %bb.0: ; %main_body 325; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 326; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 327; GFX9-NEXT: s_mov_b32 s0, s2 328; GFX9-NEXT: s_mov_b32 s1, s3 329; GFX9-NEXT: s_mov_b32 s2, s4 330; GFX9-NEXT: s_mov_b32 s3, s5 331; GFX9-NEXT: s_mov_b32 s4, s6 332; GFX9-NEXT: s_mov_b32 s5, s7 333; GFX9-NEXT: s_mov_b32 s6, s8 334; GFX9-NEXT: s_mov_b32 s7, s9 335; GFX9-NEXT: s_mov_b32 s8, s10 336; GFX9-NEXT: s_mov_b32 s9, s11 337; GFX9-NEXT: s_mov_b32 s10, s12 338; GFX9-NEXT: s_mov_b32 s11, s13 339; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 340; GFX9-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 341; GFX9-NEXT: s_waitcnt vmcnt(0) 342; GFX9-NEXT: ; return to shader part epilog 343; 344; GFX10-LABEL: sample_c_l_o_2d: 345; GFX10: ; %bb.0: ; %main_body 346; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 347; GFX10-NEXT: s_mov_b32 s0, s2 348; GFX10-NEXT: s_mov_b32 s1, s3 349; GFX10-NEXT: s_mov_b32 s2, s4 350; GFX10-NEXT: s_mov_b32 s3, s5 351; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 352; GFX10-NEXT: s_mov_b32 s4, s6 353; GFX10-NEXT: s_mov_b32 s5, s7 354; GFX10-NEXT: s_mov_b32 s6, s8 355; GFX10-NEXT: s_mov_b32 s7, s9 356; GFX10-NEXT: s_mov_b32 s8, s10 357; GFX10-NEXT: s_mov_b32 s9, s11 358; GFX10-NEXT: s_mov_b32 s10, s12 359; GFX10-NEXT: s_mov_b32 s11, s13 360; GFX10-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 361; GFX10-NEXT: s_waitcnt vmcnt(0) 362; GFX10-NEXT: ; return to shader part epilog 363main_body: 364 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 365 ret <4 x float> %v 366} 367 368define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 369; GFX9-LABEL: gather4_l_2d: 370; GFX9: ; %bb.0: ; %main_body 371; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 372; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 373; GFX9-NEXT: s_mov_b32 s0, s2 374; GFX9-NEXT: s_mov_b32 s1, s3 375; GFX9-NEXT: s_mov_b32 s2, s4 376; GFX9-NEXT: s_mov_b32 s3, s5 377; GFX9-NEXT: s_mov_b32 s4, s6 378; GFX9-NEXT: s_mov_b32 s5, s7 379; GFX9-NEXT: s_mov_b32 s6, s8 380; GFX9-NEXT: s_mov_b32 s7, s9 381; GFX9-NEXT: s_mov_b32 s8, s10 382; GFX9-NEXT: s_mov_b32 s9, s11 383; GFX9-NEXT: s_mov_b32 s10, s12 384; GFX9-NEXT: s_mov_b32 s11, s13 385; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 386; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 387; GFX9-NEXT: s_waitcnt vmcnt(0) 388; GFX9-NEXT: ; return to shader part epilog 389; 390; GFX10-LABEL: gather4_l_2d: 391; GFX10: ; %bb.0: ; %main_body 392; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 393; GFX10-NEXT: s_mov_b32 s0, s2 394; GFX10-NEXT: s_mov_b32 s1, s3 395; GFX10-NEXT: s_mov_b32 s2, s4 396; GFX10-NEXT: s_mov_b32 s3, s5 397; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, v1 398; GFX10-NEXT: s_mov_b32 s4, s6 399; GFX10-NEXT: s_mov_b32 s5, s7 400; GFX10-NEXT: s_mov_b32 s6, s8 401; GFX10-NEXT: s_mov_b32 s7, s9 402; GFX10-NEXT: s_mov_b32 s8, s10 403; GFX10-NEXT: s_mov_b32 s9, s11 404; GFX10-NEXT: s_mov_b32 s10, s12 405; GFX10-NEXT: s_mov_b32 s11, s13 406; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 407; GFX10-NEXT: s_waitcnt vmcnt(0) 408; GFX10-NEXT: ; return to shader part epilog 409main_body: 410 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 15, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 411 ret <4 x float> %v 412} 413 414define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 415; GFX9-LABEL: gather4_c_l_2d: 416; GFX9: ; %bb.0: ; %main_body 417; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 418; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 419; GFX9-NEXT: s_mov_b32 s0, s2 420; GFX9-NEXT: s_mov_b32 s1, s3 421; GFX9-NEXT: s_mov_b32 s2, s4 422; GFX9-NEXT: s_mov_b32 s3, s5 423; GFX9-NEXT: s_mov_b32 s4, s6 424; GFX9-NEXT: s_mov_b32 s5, s7 425; GFX9-NEXT: s_mov_b32 s6, s8 426; GFX9-NEXT: s_mov_b32 s7, s9 427; GFX9-NEXT: s_mov_b32 s8, s10 428; GFX9-NEXT: s_mov_b32 s9, s11 429; GFX9-NEXT: s_mov_b32 s10, s12 430; GFX9-NEXT: s_mov_b32 s11, s13 431; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 432; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 433; GFX9-NEXT: s_waitcnt vmcnt(0) 434; GFX9-NEXT: ; return to shader part epilog 435; 436; GFX10-LABEL: gather4_c_l_2d: 437; GFX10: ; %bb.0: ; %main_body 438; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 439; GFX10-NEXT: s_mov_b32 s0, s2 440; GFX10-NEXT: s_mov_b32 s1, s3 441; GFX10-NEXT: s_mov_b32 s2, s4 442; GFX10-NEXT: s_mov_b32 s3, s5 443; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 444; GFX10-NEXT: s_mov_b32 s4, s6 445; GFX10-NEXT: s_mov_b32 s5, s7 446; GFX10-NEXT: s_mov_b32 s6, s8 447; GFX10-NEXT: s_mov_b32 s7, s9 448; GFX10-NEXT: s_mov_b32 s8, s10 449; GFX10-NEXT: s_mov_b32 s9, s11 450; GFX10-NEXT: s_mov_b32 s10, s12 451; GFX10-NEXT: s_mov_b32 s11, s13 452; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 453; GFX10-NEXT: s_waitcnt vmcnt(0) 454; GFX10-NEXT: ; return to shader part epilog 455main_body: 456 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 457 ret <4 x float> %v 458} 459 460define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) { 461; GFX9-LABEL: gather4_l_o_2d: 462; GFX9: ; %bb.0: ; %main_body 463; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 464; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 465; GFX9-NEXT: s_mov_b32 s0, s2 466; GFX9-NEXT: s_mov_b32 s1, s3 467; GFX9-NEXT: s_mov_b32 s2, s4 468; GFX9-NEXT: s_mov_b32 s3, s5 469; GFX9-NEXT: s_mov_b32 s4, s6 470; GFX9-NEXT: s_mov_b32 s5, s7 471; GFX9-NEXT: s_mov_b32 s6, s8 472; GFX9-NEXT: s_mov_b32 s7, s9 473; GFX9-NEXT: s_mov_b32 s8, s10 474; GFX9-NEXT: s_mov_b32 s9, s11 475; GFX9-NEXT: s_mov_b32 s10, s12 476; GFX9-NEXT: s_mov_b32 s11, s13 477; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 478; GFX9-NEXT: image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 479; GFX9-NEXT: s_waitcnt vmcnt(0) 480; GFX9-NEXT: ; return to shader part epilog 481; 482; GFX10-LABEL: gather4_l_o_2d: 483; GFX10: ; %bb.0: ; %main_body 484; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 485; GFX10-NEXT: s_mov_b32 s0, s2 486; GFX10-NEXT: s_mov_b32 s1, s3 487; GFX10-NEXT: s_mov_b32 s2, s4 488; GFX10-NEXT: s_mov_b32 s3, s5 489; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 490; GFX10-NEXT: s_mov_b32 s4, s6 491; GFX10-NEXT: s_mov_b32 s5, s7 492; GFX10-NEXT: s_mov_b32 s6, s8 493; GFX10-NEXT: s_mov_b32 s7, s9 494; GFX10-NEXT: s_mov_b32 s8, s10 495; GFX10-NEXT: s_mov_b32 s9, s11 496; GFX10-NEXT: s_mov_b32 s10, s12 497; GFX10-NEXT: s_mov_b32 s11, s13 498; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 499; GFX10-NEXT: s_waitcnt vmcnt(0) 500; GFX10-NEXT: ; return to shader part epilog 501main_body: 502 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 503 ret <4 x float> %v 504} 505 506define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) { 507; GFX9-LABEL: gather4_c_l_o_2d: 508; GFX9: ; %bb.0: ; %main_body 509; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 510; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 511; GFX9-NEXT: s_mov_b32 s0, s2 512; GFX9-NEXT: s_mov_b32 s1, s3 513; GFX9-NEXT: s_mov_b32 s2, s4 514; GFX9-NEXT: s_mov_b32 s3, s5 515; GFX9-NEXT: s_mov_b32 s4, s6 516; GFX9-NEXT: s_mov_b32 s5, s7 517; GFX9-NEXT: s_mov_b32 s6, s8 518; GFX9-NEXT: s_mov_b32 s7, s9 519; GFX9-NEXT: s_mov_b32 s8, s10 520; GFX9-NEXT: s_mov_b32 s9, s11 521; GFX9-NEXT: s_mov_b32 s10, s12 522; GFX9-NEXT: s_mov_b32 s11, s13 523; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 524; GFX9-NEXT: image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 525; GFX9-NEXT: s_waitcnt vmcnt(0) 526; GFX9-NEXT: ; return to shader part epilog 527; 528; GFX10-LABEL: gather4_c_l_o_2d: 529; GFX10: ; %bb.0: ; %main_body 530; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 531; GFX10-NEXT: s_mov_b32 s0, s2 532; GFX10-NEXT: s_mov_b32 s1, s3 533; GFX10-NEXT: s_mov_b32 s2, s4 534; GFX10-NEXT: s_mov_b32 s3, s5 535; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 536; GFX10-NEXT: s_mov_b32 s4, s6 537; GFX10-NEXT: s_mov_b32 s5, s7 538; GFX10-NEXT: s_mov_b32 s6, s8 539; GFX10-NEXT: s_mov_b32 s7, s9 540; GFX10-NEXT: s_mov_b32 s8, s10 541; GFX10-NEXT: s_mov_b32 s9, s11 542; GFX10-NEXT: s_mov_b32 s10, s12 543; GFX10-NEXT: s_mov_b32 s11, s13 544; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 545; GFX10-NEXT: s_waitcnt vmcnt(0) 546; GFX10-NEXT: ; return to shader part epilog 547main_body: 548 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 549 ret <4 x float> %v 550} 551 552declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 553declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 554declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 555declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 556declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 immarg, i32, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 557declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 558declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 immarg, i32, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 559declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 560declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 561declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 562declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 563declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 564 565attributes #0 = { nounwind readonly } 566