1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}vgpr: 5; GCN-DAG: v_mov_b32_e32 v1, v0 6; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm 7; GCN: s_waitcnt expcnt(0) 8; GCN: v_add_f32_e32 v0, 1.0, v1 9; GCN-NOT: s_endpgm 10define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 11bb: 12 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 13 %x = fadd float %arg3, 1.000000e+00 14 %a = insertvalue { float, float } undef, float %x, 0 15 %b = insertvalue { float, float } %a, float %arg3, 1 16 ret { float, float } %b 17} 18 19; GCN-LABEL: {{^}}vgpr_literal: 20; GCN: v_mov_b32_e32 v4, v0 21; GCN: exp mrt0 v4, v4, v4, v4 done vm 22 23; GCN-DAG: v_mov_b32_e32 v0, 1.0 24; GCN-DAG: v_mov_b32_e32 v1, 2.0 25; GCN-DAG: v_mov_b32_e32 v2, 4.0 26; GCN-DAG: v_mov_b32_e32 v3, -1.0 27; GCN: s_waitcnt expcnt(0) 28; GCN-NOT: s_endpgm 29define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 30bb: 31 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 32 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 } 33} 34 35; GCN: .long 165580 36; GCN-NEXT: .long 562 37; GCN-NEXT: .long 165584 38; GCN-NEXT: .long 562 39; GCN-LABEL: {{^}}vgpr_ps_addr0: 40; GCN-NOT: v_mov_b32_e32 v0 41; GCN-NOT: v_mov_b32_e32 v1 42; GCN-NOT: v_mov_b32_e32 v2 43; GCN: v_mov_b32_e32 v3, v4 44; GCN: v_mov_b32_e32 v4, v6 45; GCN-NOT: s_endpgm 46define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 47bb: 48 %i0 = extractelement <2 x i32> %arg4, i32 0 49 %i1 = extractelement <2 x i32> %arg4, i32 1 50 %i2 = extractelement <2 x i32> %arg7, i32 0 51 %i3 = extractelement <2 x i32> %arg8, i32 0 52 %f0 = bitcast i32 %i0 to float 53 %f1 = bitcast i32 %i1 to float 54 %f2 = bitcast i32 %i2 to float 55 %f3 = bitcast i32 %i3 to float 56 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 57 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 58 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 59 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 60 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 61 ret { float, float, float, float, float } %r4 62} 63 64; GCN: .long 165580 65; GCN-NEXT: .long 1 66; GCN-NEXT: .long 165584 67; GCN-NEXT: .long 1 68; GCN-LABEL: {{^}}ps_input_ena_no_inputs: 69; GCN: v_mov_b32_e32 v0, 1.0 70; GCN-NOT: s_endpgm 71define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 72bb: 73 ret float 1.000000e+00 74} 75 76; GCN: .long 165580 77; GCN-NEXT: .long 2081 78; GCN-NEXT: .long 165584 79; GCN-NEXT: .long 2081 80; GCN-LABEL: {{^}}ps_input_ena_pos_w: 81; GCN-DAG: v_mov_b32_e32 v0, v4 82; GCN-DAG: v_mov_b32_e32 v1, v2 83; GCN: v_mov_b32_e32 v2, v3 84; GCN-NOT: s_endpgm 85define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 86bb: 87 %f = bitcast <2 x i32> %arg8 to <2 x float> 88 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0 89 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1 90 ret { float, <2 x float> } %s1 91} 92 93; GCN: .long 165580 94; GCN-NEXT: .long 562 95; GCN-NEXT: .long 165584 96; GCN-NEXT: .long 563 97; GCN-LABEL: {{^}}vgpr_ps_addr1: 98; GCN-DAG: v_mov_b32_e32 v0, v2 99; GCN-DAG: v_mov_b32_e32 v1, v3 100; GCN: v_mov_b32_e32 v2, v4 101; GCN-DAG: v_mov_b32_e32 v3, v6 102; GCN-DAG: v_mov_b32_e32 v4, v8 103; GCN-NOT: s_endpgm 104define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 { 105bb: 106 %i0 = extractelement <2 x i32> %arg4, i32 0 107 %i1 = extractelement <2 x i32> %arg4, i32 1 108 %i2 = extractelement <2 x i32> %arg7, i32 0 109 %i3 = extractelement <2 x i32> %arg8, i32 0 110 %f0 = bitcast i32 %i0 to float 111 %f1 = bitcast i32 %i1 to float 112 %f2 = bitcast i32 %i2 to float 113 %f3 = bitcast i32 %i3 to float 114 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 115 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 116 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 117 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 118 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 119 ret { float, float, float, float, float } %r4 120} 121 122; GCN: .long 165580 123; GCN-NEXT: .long 562 124; GCN-NEXT: .long 165584 125; GCN-NEXT: .long 631 126; GCN-LABEL: {{^}}vgpr_ps_addr119: 127; GCN-DAG: v_mov_b32_e32 v0, v2 128; GCN-DAG: v_mov_b32_e32 v1, v3 129; GCN-DAG: v_mov_b32_e32 v2, v6 130; GCN-DAG: v_mov_b32_e32 v3, v8 131; GCN-DAG: v_mov_b32_e32 v4, v12 132; GCN-NOT: s_endpgm 133define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { 134bb: 135 %i0 = extractelement <2 x i32> %arg4, i32 0 136 %i1 = extractelement <2 x i32> %arg4, i32 1 137 %i2 = extractelement <2 x i32> %arg7, i32 0 138 %i3 = extractelement <2 x i32> %arg8, i32 0 139 %f0 = bitcast i32 %i0 to float 140 %f1 = bitcast i32 %i1 to float 141 %f2 = bitcast i32 %i2 to float 142 %f3 = bitcast i32 %i3 to float 143 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 144 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 145 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 146 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 147 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 148 ret { float, float, float, float, float } %r4 149} 150 151; GCN: .long 165580 152; GCN-NEXT: .long 562 153; GCN-NEXT: .long 165584 154; GCN-NEXT: .long 946 155; GCN-LABEL: {{^}}vgpr_ps_addr418: 156; GCN-NOT: v_mov_b32_e32 v0 157; GCN-NOT: v_mov_b32_e32 v1 158; GCN-NOT: v_mov_b32_e32 v2 159; GCN: v_mov_b32_e32 v3, v4 160; GCN: v_mov_b32_e32 v4, v8 161; GCN-NOT: s_endpgm 162define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 { 163bb: 164 %i0 = extractelement <2 x i32> %arg4, i32 0 165 %i1 = extractelement <2 x i32> %arg4, i32 1 166 %i2 = extractelement <2 x i32> %arg7, i32 0 167 %i3 = extractelement <2 x i32> %arg8, i32 0 168 %f0 = bitcast i32 %i0 to float 169 %f1 = bitcast i32 %i1 to float 170 %f2 = bitcast i32 %i2 to float 171 %f3 = bitcast i32 %i3 to float 172 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 173 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 174 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 175 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 176 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 177 ret { float, float, float, float, float } %r4 178} 179 180; GCN-LABEL: {{^}}sgpr: 181; GCN: s_mov_b32 s2, s3 182; GCN: s_add_i32 s0, s3, 2 183; GCN-NOT: s_endpgm 184define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 185bb: 186 %x = add i32 %arg2, 2 187 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0 188 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1 189 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2 190 ret { i32, i32, i32 } %c 191} 192 193; GCN-LABEL: {{^}}sgpr_literal: 194; GCN: s_mov_b32 s0, 5 195; GCN-NOT: s_mov_b32 s0, s0 196; GCN-DAG: s_mov_b32 s1, 6 197; GCN-DAG: s_mov_b32 s2, 7 198; GCN-DAG: s_mov_b32 s3, 8 199; GCN-NOT: s_endpgm 200define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 201bb: 202 %x = add i32 %arg2, 2 203 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 } 204} 205 206; GCN-LABEL: {{^}}both: 207; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm 208; GCN-DAG: v_mov_b32_e32 v1, v0 209; GCN-DAG: s_mov_b32 s1, s2 210; GCN: s_waitcnt expcnt(0) 211; GCN: v_add_f32_e32 v0, 1.0, v1 212; GCN-DAG: s_add_i32 s0, s3, 2 213; GCN-DAG: s_mov_b32 s2, s3 214; GCN-NOT: s_endpgm 215define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 216bb: 217 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 218 %v = fadd float %arg3, 1.000000e+00 219 %s = add i32 %arg2, 2 220 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0 221 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1 222 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2 223 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3 224 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4 225 ret { float, i32, float, i32, i32 } %a4 226} 227 228; GCN-LABEL: {{^}}structure_literal: 229; GCN: v_mov_b32_e32 v3, v0 230; GCN: exp mrt0 v3, v3, v3, v3 done vm 231 232; GCN-DAG: v_mov_b32_e32 v0, 1.0 233; GCN-DAG: s_mov_b32 s0, 2 234; GCN-DAG: s_mov_b32 s1, 3 235; GCN-DAG: v_mov_b32_e32 v1, 2.0 236; GCN-DAG: v_mov_b32_e32 v2, 4.0 237; GCN: s_waitcnt expcnt(0) 238define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 239bb: 240 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 241 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } } 242} 243 244; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size: 245; GCN: codeLenInByte = 0{{$}} 246define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 { 247 ret float undef 248} 249 250declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 251 252attributes #0 = { nounwind } 253attributes #1 = { nounwind "InitialPSInputAddr"="0" } 254attributes #2 = { nounwind "InitialPSInputAddr"="1" } 255attributes #3 = { nounwind "InitialPSInputAddr"="119" } 256attributes #4 = { nounwind "InitialPSInputAddr"="418" } 257