1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 6; GFX6-LABEL: gather4_o_2d: 7; GFX6: ; %bb.0: ; %main_body 8; GFX6-NEXT: s_mov_b32 s0, s2 9; GFX6-NEXT: s_mov_b32 s1, s3 10; GFX6-NEXT: s_mov_b32 s2, s4 11; GFX6-NEXT: s_mov_b32 s3, s5 12; GFX6-NEXT: s_mov_b32 s4, s6 13; GFX6-NEXT: s_mov_b32 s5, s7 14; GFX6-NEXT: s_mov_b32 s6, s8 15; GFX6-NEXT: s_mov_b32 s7, s9 16; GFX6-NEXT: s_mov_b32 s8, s10 17; GFX6-NEXT: s_mov_b32 s9, s11 18; GFX6-NEXT: s_mov_b64 s[14:15], exec 19; GFX6-NEXT: s_mov_b32 s10, s12 20; GFX6-NEXT: s_mov_b32 s11, s13 21; GFX6-NEXT: s_wqm_b64 exec, exec 22; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 23; GFX6-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: ; return to shader part epilog 26; 27; GFX10-LABEL: gather4_o_2d: 28; GFX10: ; %bb.0: ; %main_body 29; GFX10-NEXT: s_mov_b32 s1, exec_lo 30; GFX10-NEXT: s_mov_b32 s0, s2 31; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 32; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 33; GFX10-NEXT: s_mov_b32 s1, s3 34; GFX10-NEXT: s_mov_b32 s2, s4 35; GFX10-NEXT: s_mov_b32 s3, s5 36; GFX10-NEXT: s_mov_b32 s4, s6 37; GFX10-NEXT: s_mov_b32 s5, s7 38; GFX10-NEXT: s_mov_b32 s6, s8 39; GFX10-NEXT: s_mov_b32 s7, s9 40; GFX10-NEXT: s_mov_b32 s8, s10 41; GFX10-NEXT: s_mov_b32 s9, s11 42; GFX10-NEXT: s_mov_b32 s10, s12 43; GFX10-NEXT: s_mov_b32 s11, s13 44; GFX10-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 45; GFX10-NEXT: s_waitcnt vmcnt(0) 46; GFX10-NEXT: ; return to shader part epilog 47main_body: 48 %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 49 ret <4 x float> %v 50} 51 52define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 53; GFX6-LABEL: gather4_c_o_2d: 54; GFX6: ; %bb.0: ; %main_body 55; GFX6-NEXT: s_mov_b32 s0, s2 56; GFX6-NEXT: s_mov_b32 s1, s3 57; GFX6-NEXT: s_mov_b32 s2, s4 58; GFX6-NEXT: s_mov_b32 s3, s5 59; GFX6-NEXT: s_mov_b32 s4, s6 60; GFX6-NEXT: s_mov_b32 s5, s7 61; GFX6-NEXT: s_mov_b32 s6, s8 62; GFX6-NEXT: s_mov_b32 s7, s9 63; GFX6-NEXT: s_mov_b32 s8, s10 64; GFX6-NEXT: s_mov_b32 s9, s11 65; GFX6-NEXT: s_mov_b64 s[14:15], exec 66; GFX6-NEXT: s_mov_b32 s10, s12 67; GFX6-NEXT: s_mov_b32 s11, s13 68; GFX6-NEXT: s_wqm_b64 exec, exec 69; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 70; GFX6-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 71; GFX6-NEXT: s_waitcnt vmcnt(0) 72; GFX6-NEXT: ; return to shader part epilog 73; 74; GFX10-LABEL: gather4_c_o_2d: 75; GFX10: ; %bb.0: ; %main_body 76; GFX10-NEXT: s_mov_b32 s1, exec_lo 77; GFX10-NEXT: s_mov_b32 s0, s2 78; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 79; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 80; GFX10-NEXT: s_mov_b32 s1, s3 81; GFX10-NEXT: s_mov_b32 s2, s4 82; GFX10-NEXT: s_mov_b32 s3, s5 83; GFX10-NEXT: s_mov_b32 s4, s6 84; GFX10-NEXT: s_mov_b32 s5, s7 85; GFX10-NEXT: s_mov_b32 s6, s8 86; GFX10-NEXT: s_mov_b32 s7, s9 87; GFX10-NEXT: s_mov_b32 s8, s10 88; GFX10-NEXT: s_mov_b32 s9, s11 89; GFX10-NEXT: s_mov_b32 s10, s12 90; GFX10-NEXT: s_mov_b32 s11, s13 91; GFX10-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 92; GFX10-NEXT: s_waitcnt vmcnt(0) 93; GFX10-NEXT: ; return to shader part epilog 94main_body: 95 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 96 ret <4 x float> %v 97} 98 99define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) { 100; GFX6-LABEL: gather4_cl_o_2d: 101; GFX6: ; %bb.0: ; %main_body 102; GFX6-NEXT: s_mov_b32 s0, s2 103; GFX6-NEXT: s_mov_b32 s1, s3 104; GFX6-NEXT: s_mov_b32 s2, s4 105; GFX6-NEXT: s_mov_b32 s3, s5 106; GFX6-NEXT: s_mov_b32 s4, s6 107; GFX6-NEXT: s_mov_b32 s5, s7 108; GFX6-NEXT: s_mov_b32 s6, s8 109; GFX6-NEXT: s_mov_b32 s7, s9 110; GFX6-NEXT: s_mov_b32 s8, s10 111; GFX6-NEXT: s_mov_b32 s9, s11 112; GFX6-NEXT: s_mov_b64 s[14:15], exec 113; GFX6-NEXT: s_mov_b32 s10, s12 114; GFX6-NEXT: s_mov_b32 s11, s13 115; GFX6-NEXT: s_wqm_b64 exec, exec 116; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 117; GFX6-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 118; GFX6-NEXT: s_waitcnt vmcnt(0) 119; GFX6-NEXT: ; return to shader part epilog 120; 121; GFX10-LABEL: gather4_cl_o_2d: 122; GFX10: ; %bb.0: ; %main_body 123; GFX10-NEXT: s_mov_b32 s1, exec_lo 124; GFX10-NEXT: s_mov_b32 s0, s2 125; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 126; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 127; GFX10-NEXT: s_mov_b32 s1, s3 128; GFX10-NEXT: s_mov_b32 s2, s4 129; GFX10-NEXT: s_mov_b32 s3, s5 130; GFX10-NEXT: s_mov_b32 s4, s6 131; GFX10-NEXT: s_mov_b32 s5, s7 132; GFX10-NEXT: s_mov_b32 s6, s8 133; GFX10-NEXT: s_mov_b32 s7, s9 134; GFX10-NEXT: s_mov_b32 s8, s10 135; GFX10-NEXT: s_mov_b32 s9, s11 136; GFX10-NEXT: s_mov_b32 s10, s12 137; GFX10-NEXT: s_mov_b32 s11, s13 138; GFX10-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 139; GFX10-NEXT: s_waitcnt vmcnt(0) 140; GFX10-NEXT: ; return to shader part epilog 141main_body: 142 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 143 ret <4 x float> %v 144} 145 146define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) { 147; GFX6-LABEL: gather4_c_cl_o_2d: 148; GFX6: ; %bb.0: ; %main_body 149; GFX6-NEXT: s_mov_b32 s0, s2 150; GFX6-NEXT: s_mov_b32 s1, s3 151; GFX6-NEXT: s_mov_b32 s2, s4 152; GFX6-NEXT: s_mov_b32 s3, s5 153; GFX6-NEXT: s_mov_b32 s4, s6 154; GFX6-NEXT: s_mov_b32 s5, s7 155; GFX6-NEXT: s_mov_b32 s6, s8 156; GFX6-NEXT: s_mov_b32 s7, s9 157; GFX6-NEXT: s_mov_b32 s8, s10 158; GFX6-NEXT: s_mov_b32 s9, s11 159; GFX6-NEXT: s_mov_b64 s[14:15], exec 160; GFX6-NEXT: s_mov_b32 s10, s12 161; GFX6-NEXT: s_mov_b32 s11, s13 162; GFX6-NEXT: s_wqm_b64 exec, exec 163; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 164; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 165; GFX6-NEXT: s_waitcnt vmcnt(0) 166; GFX6-NEXT: ; return to shader part epilog 167; 168; GFX10-LABEL: gather4_c_cl_o_2d: 169; GFX10: ; %bb.0: ; %main_body 170; GFX10-NEXT: s_mov_b32 s1, exec_lo 171; GFX10-NEXT: s_mov_b32 s0, s2 172; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 173; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 174; GFX10-NEXT: s_mov_b32 s1, s3 175; GFX10-NEXT: s_mov_b32 s2, s4 176; GFX10-NEXT: s_mov_b32 s3, s5 177; GFX10-NEXT: s_mov_b32 s4, s6 178; GFX10-NEXT: s_mov_b32 s5, s7 179; GFX10-NEXT: s_mov_b32 s6, s8 180; GFX10-NEXT: s_mov_b32 s7, s9 181; GFX10-NEXT: s_mov_b32 s8, s10 182; GFX10-NEXT: s_mov_b32 s9, s11 183; GFX10-NEXT: s_mov_b32 s10, s12 184; GFX10-NEXT: s_mov_b32 s11, s13 185; GFX10-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 186; GFX10-NEXT: s_waitcnt vmcnt(0) 187; GFX10-NEXT: ; return to shader part epilog 188main_body: 189 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 190 ret <4 x float> %v 191} 192 193define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) { 194; GFX6-LABEL: gather4_b_o_2d: 195; GFX6: ; %bb.0: ; %main_body 196; GFX6-NEXT: s_mov_b32 s0, s2 197; GFX6-NEXT: s_mov_b32 s1, s3 198; GFX6-NEXT: s_mov_b32 s2, s4 199; GFX6-NEXT: s_mov_b32 s3, s5 200; GFX6-NEXT: s_mov_b32 s4, s6 201; GFX6-NEXT: s_mov_b32 s5, s7 202; GFX6-NEXT: s_mov_b32 s6, s8 203; GFX6-NEXT: s_mov_b32 s7, s9 204; GFX6-NEXT: s_mov_b32 s8, s10 205; GFX6-NEXT: s_mov_b32 s9, s11 206; GFX6-NEXT: s_mov_b64 s[14:15], exec 207; GFX6-NEXT: s_mov_b32 s10, s12 208; GFX6-NEXT: s_mov_b32 s11, s13 209; GFX6-NEXT: s_wqm_b64 exec, exec 210; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 211; GFX6-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 212; GFX6-NEXT: s_waitcnt vmcnt(0) 213; GFX6-NEXT: ; return to shader part epilog 214; 215; GFX10-LABEL: gather4_b_o_2d: 216; GFX10: ; %bb.0: ; %main_body 217; GFX10-NEXT: s_mov_b32 s1, exec_lo 218; GFX10-NEXT: s_mov_b32 s0, s2 219; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 220; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 221; GFX10-NEXT: s_mov_b32 s1, s3 222; GFX10-NEXT: s_mov_b32 s2, s4 223; GFX10-NEXT: s_mov_b32 s3, s5 224; GFX10-NEXT: s_mov_b32 s4, s6 225; GFX10-NEXT: s_mov_b32 s5, s7 226; GFX10-NEXT: s_mov_b32 s6, s8 227; GFX10-NEXT: s_mov_b32 s7, s9 228; GFX10-NEXT: s_mov_b32 s8, s10 229; GFX10-NEXT: s_mov_b32 s9, s11 230; GFX10-NEXT: s_mov_b32 s10, s12 231; GFX10-NEXT: s_mov_b32 s11, s13 232; GFX10-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 233; GFX10-NEXT: s_waitcnt vmcnt(0) 234; GFX10-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) { 241; GFX6-LABEL: gather4_c_b_o_2d: 242; GFX6: ; %bb.0: ; %main_body 243; GFX6-NEXT: s_mov_b32 s0, s2 244; GFX6-NEXT: s_mov_b32 s1, s3 245; GFX6-NEXT: s_mov_b32 s2, s4 246; GFX6-NEXT: s_mov_b32 s3, s5 247; GFX6-NEXT: s_mov_b32 s4, s6 248; GFX6-NEXT: s_mov_b32 s5, s7 249; GFX6-NEXT: s_mov_b32 s6, s8 250; GFX6-NEXT: s_mov_b32 s7, s9 251; GFX6-NEXT: s_mov_b32 s8, s10 252; GFX6-NEXT: s_mov_b32 s9, s11 253; GFX6-NEXT: s_mov_b64 s[14:15], exec 254; GFX6-NEXT: s_mov_b32 s10, s12 255; GFX6-NEXT: s_mov_b32 s11, s13 256; GFX6-NEXT: s_wqm_b64 exec, exec 257; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 258; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 259; GFX6-NEXT: s_waitcnt vmcnt(0) 260; GFX6-NEXT: ; return to shader part epilog 261; 262; GFX10-LABEL: gather4_c_b_o_2d: 263; GFX10: ; %bb.0: ; %main_body 264; GFX10-NEXT: s_mov_b32 s1, exec_lo 265; GFX10-NEXT: s_mov_b32 s0, s2 266; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 267; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 268; GFX10-NEXT: s_mov_b32 s1, s3 269; GFX10-NEXT: s_mov_b32 s2, s4 270; GFX10-NEXT: s_mov_b32 s3, s5 271; GFX10-NEXT: s_mov_b32 s4, s6 272; GFX10-NEXT: s_mov_b32 s5, s7 273; GFX10-NEXT: s_mov_b32 s6, s8 274; GFX10-NEXT: s_mov_b32 s7, s9 275; GFX10-NEXT: s_mov_b32 s8, s10 276; GFX10-NEXT: s_mov_b32 s9, s11 277; GFX10-NEXT: s_mov_b32 s10, s12 278; GFX10-NEXT: s_mov_b32 s11, s13 279; GFX10-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 280; GFX10-NEXT: s_waitcnt vmcnt(0) 281; GFX10-NEXT: ; return to shader part epilog 282main_body: 283 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 284 ret <4 x float> %v 285} 286 287define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) { 288; GFX6-LABEL: gather4_b_cl_o_2d: 289; GFX6: ; %bb.0: ; %main_body 290; GFX6-NEXT: s_mov_b32 s0, s2 291; GFX6-NEXT: s_mov_b32 s1, s3 292; GFX6-NEXT: s_mov_b32 s2, s4 293; GFX6-NEXT: s_mov_b32 s3, s5 294; GFX6-NEXT: s_mov_b32 s4, s6 295; GFX6-NEXT: s_mov_b32 s5, s7 296; GFX6-NEXT: s_mov_b32 s6, s8 297; GFX6-NEXT: s_mov_b32 s7, s9 298; GFX6-NEXT: s_mov_b32 s8, s10 299; GFX6-NEXT: s_mov_b32 s9, s11 300; GFX6-NEXT: s_mov_b32 s10, s12 301; GFX6-NEXT: s_mov_b32 s11, s13 302; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 303; GFX6-NEXT: s_waitcnt vmcnt(0) 304; GFX6-NEXT: ; return to shader part epilog 305; 306; GFX10-LABEL: gather4_b_cl_o_2d: 307; GFX10: ; %bb.0: ; %main_body 308; GFX10-NEXT: s_mov_b32 s0, s2 309; GFX10-NEXT: s_mov_b32 s1, s3 310; GFX10-NEXT: s_mov_b32 s2, s4 311; GFX10-NEXT: s_mov_b32 s3, s5 312; GFX10-NEXT: s_mov_b32 s4, s6 313; GFX10-NEXT: s_mov_b32 s5, s7 314; GFX10-NEXT: s_mov_b32 s6, s8 315; GFX10-NEXT: s_mov_b32 s7, s9 316; GFX10-NEXT: s_mov_b32 s8, s10 317; GFX10-NEXT: s_mov_b32 s9, s11 318; GFX10-NEXT: s_mov_b32 s10, s12 319; GFX10-NEXT: s_mov_b32 s11, s13 320; GFX10-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 321; GFX10-NEXT: s_waitcnt vmcnt(0) 322; GFX10-NEXT: ; return to shader part epilog 323main_body: 324 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 325 ret <4 x float> %v 326} 327 328define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) { 329; GFX6-LABEL: gather4_c_b_cl_o_2d: 330; GFX6: ; %bb.0: ; %main_body 331; GFX6-NEXT: s_mov_b32 s0, s2 332; GFX6-NEXT: s_mov_b32 s1, s3 333; GFX6-NEXT: s_mov_b32 s2, s4 334; GFX6-NEXT: s_mov_b32 s3, s5 335; GFX6-NEXT: s_mov_b32 s4, s6 336; GFX6-NEXT: s_mov_b32 s5, s7 337; GFX6-NEXT: s_mov_b32 s6, s8 338; GFX6-NEXT: s_mov_b32 s7, s9 339; GFX6-NEXT: s_mov_b32 s8, s10 340; GFX6-NEXT: s_mov_b32 s9, s11 341; GFX6-NEXT: s_mov_b64 s[14:15], exec 342; GFX6-NEXT: s_mov_b32 s10, s12 343; GFX6-NEXT: s_mov_b32 s11, s13 344; GFX6-NEXT: s_wqm_b64 exec, exec 345; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 346; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 347; GFX6-NEXT: s_waitcnt vmcnt(0) 348; GFX6-NEXT: ; return to shader part epilog 349; 350; GFX10-LABEL: gather4_c_b_cl_o_2d: 351; GFX10: ; %bb.0: ; %main_body 352; GFX10-NEXT: s_mov_b32 s1, exec_lo 353; GFX10-NEXT: s_mov_b32 s0, s2 354; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 355; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 356; GFX10-NEXT: s_mov_b32 s1, s3 357; GFX10-NEXT: s_mov_b32 s2, s4 358; GFX10-NEXT: s_mov_b32 s3, s5 359; GFX10-NEXT: s_mov_b32 s4, s6 360; GFX10-NEXT: s_mov_b32 s5, s7 361; GFX10-NEXT: s_mov_b32 s6, s8 362; GFX10-NEXT: s_mov_b32 s7, s9 363; GFX10-NEXT: s_mov_b32 s8, s10 364; GFX10-NEXT: s_mov_b32 s9, s11 365; GFX10-NEXT: s_mov_b32 s10, s12 366; GFX10-NEXT: s_mov_b32 s11, s13 367; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 368; GFX10-NEXT: s_waitcnt vmcnt(0) 369; GFX10-NEXT: ; return to shader part epilog 370main_body: 371 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 372 ret <4 x float> %v 373} 374 375define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { 376; GFX6-LABEL: gather4_l_o_2d: 377; GFX6: ; %bb.0: ; %main_body 378; GFX6-NEXT: s_mov_b32 s0, s2 379; GFX6-NEXT: s_mov_b32 s1, s3 380; GFX6-NEXT: s_mov_b32 s2, s4 381; GFX6-NEXT: s_mov_b32 s3, s5 382; GFX6-NEXT: s_mov_b32 s4, s6 383; GFX6-NEXT: s_mov_b32 s5, s7 384; GFX6-NEXT: s_mov_b32 s6, s8 385; GFX6-NEXT: s_mov_b32 s7, s9 386; GFX6-NEXT: s_mov_b32 s8, s10 387; GFX6-NEXT: s_mov_b32 s9, s11 388; GFX6-NEXT: s_mov_b32 s10, s12 389; GFX6-NEXT: s_mov_b32 s11, s13 390; GFX6-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 391; GFX6-NEXT: s_waitcnt vmcnt(0) 392; GFX6-NEXT: ; return to shader part epilog 393; 394; GFX10-LABEL: gather4_l_o_2d: 395; GFX10: ; %bb.0: ; %main_body 396; GFX10-NEXT: s_mov_b32 s0, s2 397; GFX10-NEXT: s_mov_b32 s1, s3 398; GFX10-NEXT: s_mov_b32 s2, s4 399; GFX10-NEXT: s_mov_b32 s3, s5 400; GFX10-NEXT: s_mov_b32 s4, s6 401; GFX10-NEXT: s_mov_b32 s5, s7 402; GFX10-NEXT: s_mov_b32 s6, s8 403; GFX10-NEXT: s_mov_b32 s7, s9 404; GFX10-NEXT: s_mov_b32 s8, s10 405; GFX10-NEXT: s_mov_b32 s9, s11 406; GFX10-NEXT: s_mov_b32 s10, s12 407; GFX10-NEXT: s_mov_b32 s11, s13 408; GFX10-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 409; GFX10-NEXT: s_waitcnt vmcnt(0) 410; GFX10-NEXT: ; return to shader part epilog 411main_body: 412 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 413 ret <4 x float> %v 414} 415 416define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { 417; GFX6-LABEL: gather4_c_l_o_2d: 418; GFX6: ; %bb.0: ; %main_body 419; GFX6-NEXT: s_mov_b32 s0, s2 420; GFX6-NEXT: s_mov_b32 s1, s3 421; GFX6-NEXT: s_mov_b32 s2, s4 422; GFX6-NEXT: s_mov_b32 s3, s5 423; GFX6-NEXT: s_mov_b32 s4, s6 424; GFX6-NEXT: s_mov_b32 s5, s7 425; GFX6-NEXT: s_mov_b32 s6, s8 426; GFX6-NEXT: s_mov_b32 s7, s9 427; GFX6-NEXT: s_mov_b32 s8, s10 428; GFX6-NEXT: s_mov_b32 s9, s11 429; GFX6-NEXT: s_mov_b32 s10, s12 430; GFX6-NEXT: s_mov_b32 s11, s13 431; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 432; GFX6-NEXT: s_waitcnt vmcnt(0) 433; GFX6-NEXT: ; return to shader part epilog 434; 435; GFX10-LABEL: gather4_c_l_o_2d: 436; GFX10: ; %bb.0: ; %main_body 437; GFX10-NEXT: s_mov_b32 s0, s2 438; GFX10-NEXT: s_mov_b32 s1, s3 439; GFX10-NEXT: s_mov_b32 s2, s4 440; GFX10-NEXT: s_mov_b32 s3, s5 441; GFX10-NEXT: s_mov_b32 s4, s6 442; GFX10-NEXT: s_mov_b32 s5, s7 443; GFX10-NEXT: s_mov_b32 s6, s8 444; GFX10-NEXT: s_mov_b32 s7, s9 445; GFX10-NEXT: s_mov_b32 s8, s10 446; GFX10-NEXT: s_mov_b32 s9, s11 447; GFX10-NEXT: s_mov_b32 s10, s12 448; GFX10-NEXT: s_mov_b32 s11, s13 449; GFX10-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 450; GFX10-NEXT: s_waitcnt vmcnt(0) 451; GFX10-NEXT: ; return to shader part epilog 452main_body: 453 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 454 ret <4 x float> %v 455} 456 457define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 458; GFX6-LABEL: gather4_lz_o_2d: 459; GFX6: ; %bb.0: ; %main_body 460; GFX6-NEXT: s_mov_b32 s0, s2 461; GFX6-NEXT: s_mov_b32 s1, s3 462; GFX6-NEXT: s_mov_b32 s2, s4 463; GFX6-NEXT: s_mov_b32 s3, s5 464; GFX6-NEXT: s_mov_b32 s4, s6 465; GFX6-NEXT: s_mov_b32 s5, s7 466; GFX6-NEXT: s_mov_b32 s6, s8 467; GFX6-NEXT: s_mov_b32 s7, s9 468; GFX6-NEXT: s_mov_b32 s8, s10 469; GFX6-NEXT: s_mov_b32 s9, s11 470; GFX6-NEXT: s_mov_b32 s10, s12 471; GFX6-NEXT: s_mov_b32 s11, s13 472; GFX6-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 473; GFX6-NEXT: s_waitcnt vmcnt(0) 474; GFX6-NEXT: ; return to shader part epilog 475; 476; GFX10-LABEL: gather4_lz_o_2d: 477; GFX10: ; %bb.0: ; %main_body 478; GFX10-NEXT: s_mov_b32 s0, s2 479; GFX10-NEXT: s_mov_b32 s1, s3 480; GFX10-NEXT: s_mov_b32 s2, s4 481; GFX10-NEXT: s_mov_b32 s3, s5 482; GFX10-NEXT: s_mov_b32 s4, s6 483; GFX10-NEXT: s_mov_b32 s5, s7 484; GFX10-NEXT: s_mov_b32 s6, s8 485; GFX10-NEXT: s_mov_b32 s7, s9 486; GFX10-NEXT: s_mov_b32 s8, s10 487; GFX10-NEXT: s_mov_b32 s9, s11 488; GFX10-NEXT: s_mov_b32 s10, s12 489; GFX10-NEXT: s_mov_b32 s11, s13 490; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 491; GFX10-NEXT: s_waitcnt vmcnt(0) 492; GFX10-NEXT: ; return to shader part epilog 493main_body: 494 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 495 ret <4 x float> %v 496} 497 498define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 499; GFX6-LABEL: gather4_c_lz_o_2d: 500; GFX6: ; %bb.0: ; %main_body 501; GFX6-NEXT: s_mov_b32 s0, s2 502; GFX6-NEXT: s_mov_b32 s1, s3 503; GFX6-NEXT: s_mov_b32 s2, s4 504; GFX6-NEXT: s_mov_b32 s3, s5 505; GFX6-NEXT: s_mov_b32 s4, s6 506; GFX6-NEXT: s_mov_b32 s5, s7 507; GFX6-NEXT: s_mov_b32 s6, s8 508; GFX6-NEXT: s_mov_b32 s7, s9 509; GFX6-NEXT: s_mov_b32 s8, s10 510; GFX6-NEXT: s_mov_b32 s9, s11 511; GFX6-NEXT: s_mov_b32 s10, s12 512; GFX6-NEXT: s_mov_b32 s11, s13 513; GFX6-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 514; GFX6-NEXT: s_waitcnt vmcnt(0) 515; GFX6-NEXT: ; return to shader part epilog 516; 517; GFX10-LABEL: gather4_c_lz_o_2d: 518; GFX10: ; %bb.0: ; %main_body 519; GFX10-NEXT: s_mov_b32 s0, s2 520; GFX10-NEXT: s_mov_b32 s1, s3 521; GFX10-NEXT: s_mov_b32 s2, s4 522; GFX10-NEXT: s_mov_b32 s3, s5 523; GFX10-NEXT: s_mov_b32 s4, s6 524; GFX10-NEXT: s_mov_b32 s5, s7 525; GFX10-NEXT: s_mov_b32 s6, s8 526; GFX10-NEXT: s_mov_b32 s7, s9 527; GFX10-NEXT: s_mov_b32 s8, s10 528; GFX10-NEXT: s_mov_b32 s9, s11 529; GFX10-NEXT: s_mov_b32 s10, s12 530; GFX10-NEXT: s_mov_b32 s11, s13 531; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 532; GFX10-NEXT: s_waitcnt vmcnt(0) 533; GFX10-NEXT: ; return to shader part epilog 534main_body: 535 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 536 ret <4 x float> %v 537} 538 539declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 540declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 541declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 542declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 543declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 544declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 545declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 546declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 547declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 548declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 549declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 550declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 551 552attributes #0 = { nounwind readonly } 553