1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s 3 4declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 5declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 6declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #2 7 8; GCN-LABEL: {{^}}test_export_zeroes_f32: 9; GCN: exp mrt0 off, off, off, off{{$}} 10; GCN: exp mrt0 off, off, off, off done{{$}} 11define amdgpu_kernel void @test_export_zeroes_f32() #0 { 12 13 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false) 14 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false) 15 ret void 16} 17 18; FIXME: Should not set up registers for the unused source registers. 19 20; GCN-LABEL: {{^}}test_export_en_src0_f32: 21; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 22; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 23; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 24; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 25; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}} 26define amdgpu_kernel void @test_export_en_src0_f32() #0 { 27 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 28 ret void 29} 30 31; GCN-LABEL: {{^}}test_export_en_src1_f32: 32; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 33; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 34; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 35; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 36; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}} 37define amdgpu_kernel void @test_export_en_src1_f32() #0 { 38 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 39 ret void 40} 41 42; GCN-LABEL: {{^}}test_export_en_src2_f32: 43; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 44; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 45; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 46; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 47; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}} 48define amdgpu_kernel void @test_export_en_src2_f32() #0 { 49 call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 50 ret void 51} 52 53; GCN-LABEL: {{^}}test_export_en_src3_f32: 54; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 55; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 56; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 57; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 58; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}} 59define amdgpu_kernel void @test_export_en_src3_f32() #0 { 60 call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 61 ret void 62} 63 64; GCN-LABEL: {{^}}test_export_en_src0_src1_f32: 65; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 66; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 67; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 68; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 69; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}} 70define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 { 71 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 72 ret void 73} 74 75; GCN-LABEL: {{^}}test_export_en_src0_src2_f32: 76; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 77; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 78; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 79; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 80; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}} 81define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 { 82 call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 83 ret void 84} 85 86; GCN-LABEL: {{^}}test_export_en_src0_src3_f32: 87; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 88; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 89; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 90; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 91; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}} 92; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}} 93define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 { 94 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 95 call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 96 ret void 97} 98 99; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_f32: 100; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 101; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 102; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 103; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 104; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 105; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 106define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 { 107 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 108 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 109 ret void 110} 111 112; GCN-LABEL: {{^}}test_export_mrt7_f32: 113; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5 114; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}} 115; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}} 116define amdgpu_kernel void @test_export_mrt7_f32() #0 { 117 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false) 118 call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false) 119 ret void 120} 121 122; GCN-LABEL: {{^}}test_export_z_f32: 123; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 124; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 125; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 126; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 127; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 128; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 129define amdgpu_kernel void @test_export_z_f32() #0 { 130 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 131 call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 132 ret void 133} 134 135; GCN-LABEL: {{^}}test_export_null_f32: 136; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 137; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 138; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 139; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 140; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 141; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 142define amdgpu_kernel void @test_export_null_f32() #0 { 143 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 144 call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 145 ret void 146} 147 148; GCN-LABEL: {{^}}test_export_reserved10_f32: 149; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 150; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 151; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 152; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 153; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 154; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 155define amdgpu_kernel void @test_export_reserved10_f32() #0 { 156 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 157 call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 158 ret void 159} 160 161; GCN-LABEL: {{^}}test_export_reserved11_f32: 162; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 163; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 164; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 165; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 166; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 167; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 168define amdgpu_kernel void @test_export_reserved11_f32() #0 { 169 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 170 call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 171 ret void 172} 173 174; GCN-LABEL: {{^}}test_export_pos0_f32: 175; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 176; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 177; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 178; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 179; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 180; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 181define amdgpu_kernel void @test_export_pos0_f32() #0 { 182 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 183 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 184 ret void 185} 186 187; GCN-LABEL: {{^}}test_export_pos3_f32: 188; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 189; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 190; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 191; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 192; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 193; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 194define amdgpu_kernel void @test_export_pos3_f32() #0 { 195 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 196 call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 197 ret void 198} 199 200; GCN-LABEL: {{^}}test_export_param0_f32: 201; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 202; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 203; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 204; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 205; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 206; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 207define amdgpu_kernel void @test_export_param0_f32() #0 { 208 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 209 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 210 ret void 211} 212 213; GCN-LABEL: {{^}}test_export_param31_f32: 214; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 215; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 216; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 217; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 218; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 219; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 220define amdgpu_kernel void @test_export_param31_f32() #0 { 221 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) 222 call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 223 ret void 224} 225 226; GCN-LABEL: {{^}}test_export_vm_f32: 227; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 228; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 229; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 230; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 231; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} 232; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} 233define amdgpu_kernel void @test_export_vm_f32() #0 { 234 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true) 235 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true) 236 ret void 237} 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253; GCN-LABEL: {{^}}test_export_zeroes_i32: 254; GCN: exp mrt0 off, off, off, off{{$}} 255; GCN: exp mrt0 off, off, off, off done{{$}} 256define amdgpu_kernel void @test_export_zeroes_i32() #0 { 257 258 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) 259 call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false) 260 ret void 261} 262 263; FIXME: Should not set up registers for the unused source registers. 264 265; GCN-LABEL: {{^}}test_export_en_src0_i32: 266; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 267; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 268; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 269; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 270; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}} 271define amdgpu_kernel void @test_export_en_src0_i32() #0 { 272 call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 273 ret void 274} 275 276; GCN-LABEL: {{^}}test_export_en_src1_i32: 277; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 278; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 279; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 280; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 281; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}} 282define amdgpu_kernel void @test_export_en_src1_i32() #0 { 283 call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 284 ret void 285} 286 287; GCN-LABEL: {{^}}test_export_en_src2_i32: 288; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 289; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 290; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 291; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 292; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}} 293define amdgpu_kernel void @test_export_en_src2_i32() #0 { 294 call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 295 ret void 296} 297 298; GCN-LABEL: {{^}}test_export_en_src3_i32: 299; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 300; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 301; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 302; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 303; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}} 304define amdgpu_kernel void @test_export_en_src3_i32() #0 { 305 call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 306 ret void 307} 308 309; GCN-LABEL: {{^}}test_export_en_src0_src1_i32: 310; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 311; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 312; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 313; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 314; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}} 315define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 { 316 call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 317 ret void 318} 319 320; GCN-LABEL: {{^}}test_export_en_src0_src2_i32: 321; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 322; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 323; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 324; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 325; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}} 326define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 { 327 call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 328 ret void 329} 330 331; GCN-LABEL: {{^}}test_export_en_src0_src3_i32: 332; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 333; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 334; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 335; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 336; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}} 337; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}} 338define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 { 339 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 340 call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 341 ret void 342} 343 344; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_i32: 345; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 346; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 347; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 348; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 349; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 350; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 351define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 { 352 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 353 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 354 ret void 355} 356 357; GCN-LABEL: {{^}}test_export_mrt7_i32: 358; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5 359; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}} 360; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}} 361define amdgpu_kernel void @test_export_mrt7_i32() #0 { 362 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false) 363 call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false) 364 ret void 365} 366 367; GCN-LABEL: {{^}}test_export_z_i32: 368; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 369; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 370; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 371; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 372; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 373; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 374define amdgpu_kernel void @test_export_z_i32() #0 { 375 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 376 call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 377 ret void 378} 379 380; GCN-LABEL: {{^}}test_export_null_i32: 381; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 382; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 383; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 384; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 385; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 386; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 387define amdgpu_kernel void @test_export_null_i32() #0 { 388 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 389 call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 390 ret void 391} 392 393; GCN-LABEL: {{^}}test_export_reserved10_i32: 394; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 395; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 396; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 397; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 398; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 399; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 400define amdgpu_kernel void @test_export_reserved10_i32() #0 { 401 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 402 call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 403 ret void 404} 405 406; GCN-LABEL: {{^}}test_export_reserved11_i32: 407; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 408; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 409; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 410; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 411; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 412; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 413define amdgpu_kernel void @test_export_reserved11_i32() #0 { 414 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 415 call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 416 ret void 417} 418 419; GCN-LABEL: {{^}}test_export_pos0_i32: 420; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 421; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 422; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 423; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 424; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 425; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 426define amdgpu_kernel void @test_export_pos0_i32() #0 { 427 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 428 call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 429 ret void 430} 431 432; GCN-LABEL: {{^}}test_export_pos3_i32: 433; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 434; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 435; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 436; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 437; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 438; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 439define amdgpu_kernel void @test_export_pos3_i32() #0 { 440 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 441 call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 442 ret void 443} 444 445; GCN-LABEL: {{^}}test_export_param0_i32: 446; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 447; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 448; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 449; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 450; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 451; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 452define amdgpu_kernel void @test_export_param0_i32() #0 { 453 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 454 call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 455 ret void 456} 457 458; GCN-LABEL: {{^}}test_export_param31_i32: 459; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 460; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 461; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 462; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 463; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} 464; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} 465define amdgpu_kernel void @test_export_param31_i32() #0 { 466 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) 467 call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) 468 ret void 469} 470 471; GCN-LABEL: {{^}}test_export_vm_i32: 472; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 473; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 474; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 475; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 476; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} 477; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} 478define amdgpu_kernel void @test_export_vm_i32() #0 { 479 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true) 480 call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true) 481 ret void 482} 483 484; GCN-LABEL: {{^}}test_if_export_f32: 485; GCN: s_cbranch_execz 486; GCN: exp 487define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 488 %cc = icmp eq i32 %flag, 0 489 br i1 %cc, label %end, label %exp 490 491exp: 492 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false) 493 br label %end 494 495end: 496 ret void 497} 498 499; GCN-LABEL: {{^}}test_if_export_vm_f32: 500; GCN: s_cbranch_execz 501; GCN: exp 502define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 503 %cc = icmp eq i32 %flag, 0 504 br i1 %cc, label %end, label %exp 505 506exp: 507 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true) 508 br label %end 509 510end: 511 ret void 512} 513 514; GCN-LABEL: {{^}}test_if_export_done_f32: 515; GCN: s_cbranch_execz 516; GCN: exp 517define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 518 %cc = icmp eq i32 %flag, 0 519 br i1 %cc, label %end, label %exp 520 521exp: 522 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false) 523 br label %end 524 525end: 526 ret void 527} 528 529; GCN-LABEL: {{^}}test_if_export_vm_done_f32: 530; GCN: s_cbranch_execz 531; GCN: exp 532define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 533 %cc = icmp eq i32 %flag, 0 534 br i1 %cc, label %end, label %exp 535 536exp: 537 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 538 br label %end 539 540end: 541 ret void 542} 543 544; GCN-LABEL: {{^}}test_export_clustering: 545; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0 546; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 547; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0 548; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1 549; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]] 550; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]] 551; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} 552; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} 553define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 { 554 %z0 = fadd float %x, %y 555 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false) 556 %z1 = fsub float %y, %x 557 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %x, float %y, float %z1, float 1.0, i1 true, i1 false) 558 ret void 559} 560 561; GCN-LABEL: {{^}}test_export_pos_before_param: 562; GCN: exp pos0 563; GCN-NOT: s_waitcnt 564; GCN: exp param0 565define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 { 566 %z0 = fadd float %x, %y 567 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) 568 %z1 = fsub float %y, %x 569 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false) 570 ret void 571} 572 573; GCN-LABEL: {{^}}test_export_pos4_before_param: 574; GFX10: exp pos4 575; GFX10-NOT: s_waitcnt 576; GFX10: exp param0 577define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 { 578 %z0 = fadd float %x, %y 579 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) 580 %z1 = fsub float %y, %x 581 call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false) 582 ret void 583} 584 585; GCN-LABEL: {{^}}test_export_pos_before_param_ordered: 586; GCN: exp pos0 587; GCN: exp pos1 588; GCN: exp pos2 589; GCN-NOT: s_waitcnt 590; GCN: exp param0 591; GCN: exp param1 592; GCN: exp param2 593define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 { 594 %z0 = fadd float %x, %y 595 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) 596 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) 597 call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) 598 %z1 = fsub float %y, %x 599 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false) 600 call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false) 601 call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false) 602 ret void 603} 604 605; GCN-LABEL: {{^}}test_export_pos_before_param_across_load: 606; GCN: exp pos0 607; GCN-NEXT: exp param0 608; GCN-NEXT: exp param1 609define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 { 610 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) 611 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) 612 %load = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0) 613 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false) 614 ret void 615} 616 617; GCN-LABEL: {{^}}test_export_across_store_load: 618; GCN: buffer_store 619; GCN: buffer_load 620; GCN: exp pos0 621; GCN: exp param0 622; GCN: exp param1 623define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 { 624 %data0 = alloca <4 x float>, align 8, addrspace(5) 625 %data1 = alloca <4 x float>, align 8, addrspace(5) 626 %cmp = icmp eq i32 %idx, 1 627 %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1 628 %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0 629 store float %v, float addrspace(5)* %sptr, align 8 630 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false) 631 %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0 632 %load0 = load float, float addrspace(5)* %ptr0, align 8 633 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) 634 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) 635 ret void 636} 637 638attributes #0 = { nounwind } 639attributes #1 = { nounwind inaccessiblememonly } 640attributes #2 = { nounwind readnone } 641