1; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NONSA %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NSA %s 3 4; GCN-LABEL: {{^}}sample_2d: 5; 6; TODO: use NSA here 7; GCN: v_mov_b32_e32 v2, v0 8; 9; GCN: image_sample v[0:3], v[1:2], 10define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) { 11main_body: 12 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 13 ret <4 x float> %v 14} 15 16; GCN-LABEL: {{^}}sample_3d: 17; NONSA: v_mov_b32_e32 v3, v0 18; NONSA: image_sample v[0:3], v[1:3], 19; NSA: image_sample v[0:3], [v1, v2, v0], 20define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) { 21main_body: 22 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 23 ret <4 x float> %v 24} 25 26; GCN-LABEL: {{^}}sample_d_3d: 27; NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1], 28define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) { 29main_body: 30 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 31 ret <4 x float> %v 32} 33 34; GCN-LABEL: {{^}}sample_contig_nsa: 35; GCN: image_sample_c_l v0, v[0:7], 36; NSA: image_sample v1, [v6, v7, v5], 37define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) { 38main_body: 39 %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 40 %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 41 %r.0 = insertelement <2 x float> undef, float %v1, i32 0 42 %r = insertelement <2 x float> %r.0, float %v2, i32 1 43 ret <2 x float> %r 44} 45 46; GCN-LABEL: {{^}}sample_nsa_nsa: 47; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0], 48; NSA: image_sample v1, [v6, v7, v5], 49define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) { 50main_body: 51 %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 52 %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 53 %r.0 = insertelement <2 x float> undef, float %v1, i32 0 54 %r = insertelement <2 x float> %r.0, float %v2, i32 1 55 ret <2 x float> %r 56} 57 58; GCN-LABEL: {{^}}sample_nsa_contig: 59; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0], 60; NSA: image_sample v1, v[5:7], 61define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) { 62main_body: 63 %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 64 %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 65 %r.0 = insertelement <2 x float> undef, float %v1, i32 0 66 %r = insertelement <2 x float> %r.0, float %v2, i32 1 67 ret <2 x float> %r 68} 69 70; GCN-LABEL: {{^}}sample_contig_contig: 71; GCN: image_sample_c_l v0, v[0:7], 72; NSA: image_sample v1, v[5:7], 73; NONSA: image_sample v1, v[5:7], 74define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) { 75main_body: 76 %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 77 %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 78 %r.0 = insertelement <2 x float> undef, float %v1, i32 0 79 %r = insertelement <2 x float> %r.0, float %v2, i32 1 80 ret <2 x float> %r 81} 82 83; Test that undef inputs with NSA are handled safely; these tests used to crash. 84 85; GCN-LABEL: {{^}}sample_undef_undef_undef_undef: 86; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY 87define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) { 88 %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 89 ret float %r 90} 91 92; GCN-LABEL: {{^}}sample_undef_undef_undef_def: 93; NONSA: v_mov_b32_e32 v3, v0 94; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY 95; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY 96define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) { 97 %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 98 ret float %r 99} 100 101; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd: 102; GCN: v_rndne_f32_e32 v3, v0 103; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY 104define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) { 105 %layer_rnd = call float @llvm.rint.f32(float %layer) 106 %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 107 ret float %r 108} 109 110; GCN-LABEL: {{^}}sample_def_undef_undef_undef: 111; GCN: v_add_f32_e32 v0, 1.0, v0 112; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY 113define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) { 114 ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands 115 ; comes directly from a function argument (via COPY). To test that NSA can be 116 ; eliminated in the presence of undef, just add an arbitrary intermediate 117 ; computation. 118 %c0 = fadd float %z0, 1.0 119 %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 120 ret float %r 121} 122 123declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 124declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 125declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 126 127declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 128declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 129 130declare float @llvm.rint.f32(float) #2 131declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 132 133attributes #1 = { nounwind readonly } 134attributes #2 = { nounwind readnone speculatable willreturn } 135