1; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86 2; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64 3 4; Test integer arguments. 5 6define x86_vectorcallcc i32 @test_int_1() { 7; CHECK-LABEL: {{^}}test_int_1@@0: 8; CHECK: xorl %eax, %eax 9 ret i32 0 10} 11 12define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) { 13; X86-LABEL: {{^}}test_int_2@@4: 14; X64-LABEL: {{^}}test_int_2@@8: 15; CHECK: movl %ecx, %eax 16 ret i32 %a 17} 18 19define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) { 20; X86-LABEL: {{^}}test_int_3@@8: 21; X64-LABEL: {{^}}test_int_3@@8: 22; X86: movl %ecx, %eax 23; X64: movq %rcx, %rax 24 %at = trunc i64 %a to i32 25 ret i32 %at 26} 27 28define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) { 29; X86-LABEL: {{^}}test_int_4@@8: 30; X86: leal (%ecx,%edx), %eax 31; X64-LABEL: {{^}}test_int_4@@16: 32; X64: leal (%rcx,%rdx), %eax 33 %s = add i32 %a, %b 34 ret i32 %s 35} 36 37define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) { 38; CHECK-LABEL: {{^}}test_int_5: 39 ret i32 0 40} 41 42define x86_vectorcallcc double @test_fp_1(double %a, double %b) { 43; CHECK-LABEL: {{^}}test_fp_1@@16: 44; CHECK: movaps %xmm1, %xmm0 45 ret double %b 46} 47 48define x86_vectorcallcc double @test_fp_2(double, double, double, double, double, double, double %r) { 49; CHECK-LABEL: {{^}}test_fp_2@@56: 50; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0 51 ret double %r 52} 53 54define x86_vectorcallcc {double, double, double, double} @test_fp_3() { 55; CHECK-LABEL: {{^}}test_fp_3@@0: 56; CHECK: xorps %xmm0 57; CHECK: xorps %xmm1 58; CHECK: xorps %xmm2 59; CHECK: xorps %xmm3 60 ret {double, double, double, double} 61 { double 0.0, double 0.0, double 0.0, double 0.0 } 62} 63 64; FIXME: Returning via x87 isn't compatible, but its hard to structure the 65; tablegen any other way. 66define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() { 67; CHECK-LABEL: {{^}}test_fp_4@@0: 68; CHECK: fldz 69; CHECK: xorps %xmm0 70; CHECK: xorps %xmm1 71; CHECK: xorps %xmm2 72; CHECK: xorps %xmm3 73 ret {double, double, double, double, double} 74 { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 } 75} 76 77define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) { 78; CHECK-LABEL: {{^}}test_vec_1@@32: 79; CHECK: movaps %xmm1, %xmm0 80 ret <16 x i8> %b 81} 82 83define x86_vectorcallcc <16 x i8> @test_vec_2(double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) { 84; CHECK-LABEL: {{^}}test_vec_2@@104: 85; X64: movq {{[0-9]*}}(%rsp), %rax 86; CHECK: movaps (%{{rax|ecx}}), %xmm0 87 ret <16 x i8> %r 88} 89 90%struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> } 91%struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } 92%struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> } 93%struct.HVA2 = type { <4 x float>, <4 x float> } 94 95define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) { 96; CHECK-LABEL: test_mixed_1 97; CHECK: movaps %xmm1, 16(%{{(e|r)}}sp) 98; CHECK: movaps %xmm1, %xmm0 99; CHECK: ret{{q|l}} 100entry: 101 %b = alloca %struct.HVA4, align 16 102 store %struct.HVA4 %bb, %struct.HVA4* %b, align 16 103 %w1 = getelementptr inbounds %struct.HVA4, %struct.HVA4* %b, i32 0, i32 1 104 %0 = load <4 x float>, <4 x float>* %w1, align 16 105 ret <4 x float> %0 106} 107 108define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) { 109; CHECK-LABEL: test_mixed_2 110; X86: movaps %xmm0, (%esp) 111; X64: movaps %xmm2, %xmm0 112; CHECK: ret{{[ql]}} 113entry: 114 %c.addr = alloca <4 x float>, align 16 115 store <4 x float> %c, <4 x float>* %c.addr, align 16 116 %0 = load <4 x float>, <4 x float>* %c.addr, align 16 117 ret <4 x float> %0 118} 119 120define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) { 121; CHECK-LABEL: test_mixed_3 122; CHECK: movaps (%{{[re][ac]}}x), %xmm0 123; CHECK: ret{{[ql]}} 124entry: 125 %x = getelementptr inbounds %struct.HVA2, %struct.HVA2* %f, i32 0, i32 0 126 %0 = load <4 x float>, <4 x float>* %x, align 16 127 ret <4 x float> %0 128} 129 130define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, %struct.HVA2* %bb, <4 x float> %c) { 131; CHECK-LABEL: test_mixed_4 132; X86: movaps 16(%eax), %xmm0 133; X64: movaps 16(%rdx), %xmm0 134; CHECK: ret{{[ql]}} 135entry: 136 %y4 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %bb, i32 0, i32 1 137 %0 = load <4 x float>, <4 x float>* %y4, align 16 138 ret <4 x float> %0 139} 140 141define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %dd) { 142; CHECK-LABEL: test_mixed_5 143; CHECK-DAG: movaps %xmm{{[0,5]}}, 16(%{{(e|r)}}sp) 144; CHECK-DAG: movaps %xmm5, %xmm0 145; CHECK: ret{{[ql]}} 146entry: 147 %d = alloca %struct.HVA2, align 16 148 store %struct.HVA2 %dd, %struct.HVA2* %d, align 16 149 %y5 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %d, i32 0, i32 1 150 %0 = load <4 x float>, <4 x float>* %y5, align 16 151 ret <4 x float> %0 152} 153 154define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) { 155; CHECK-LABEL: test_mixed_6 156; CHECK: movaps (%{{[re]}}sp), %xmm0 157; CHECK: movaps 16(%{{[re]}}sp), %xmm1 158; CHECK: movaps 32(%{{[re]}}sp), %xmm2 159; CHECK: movaps 48(%{{[re]}}sp), %xmm3 160; CHECK: ret{{[ql]}} 161entry: 162 %retval = alloca %struct.HVA4, align 16 163 %0 = bitcast %struct.HVA4* %retval to i8* 164 %1 = bitcast %struct.HVA4* %b to i8* 165 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %0, i8* align 16 %1, i32 64, i1 false) 166 %2 = load %struct.HVA4, %struct.HVA4* %retval, align 16 167 ret %struct.HVA4 %2 168} 169 170declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) 171declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) 172declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) 173 174define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret(%struct.HVA5) %agg.result) { 175; X86-LABEL: test_mixed_7@@4 176; X64-LABEL: test_mixed_7@@8 177; X64: mov{{[ql]}} %rcx, %rax 178; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}}) 179; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}}) 180; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}}) 181; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}}) 182; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}}) 183; CHECK: ret{{[ql]}} 184entry: 185 %a = alloca %struct.HVA5, align 16 186 %0 = bitcast %struct.HVA5* %a to i8* 187 call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 80, i1 false) 188 %1 = bitcast %struct.HVA5* %agg.result to i8* 189 %2 = bitcast %struct.HVA5* %a to i8* 190 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 80, i1 false) 191 ret void 192} 193 194define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) { 195; CHECK-LABEL: test_mixed_8 196; X86: movaps %xmm4, %xmm0 197; X64: movaps %xmm5, %xmm0 198; CHECK: ret{{[ql]}} 199entry: 200 %f.addr = alloca <4 x float>, align 16 201 store <4 x float> %f, <4 x float>* %f.addr, align 16 202 %0 = load <4 x float>, <4 x float>* %f.addr, align 16 203 ret <4 x float> %0 204} 205 206%struct.HFA4 = type { double, double, double, double } 207declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y) 208 209define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) { 210; CHECK-LABEL: test_mixed_9_caller 211; CHECK: movaps %xmm3, %xmm4 212; CHECK: movaps %xmm2, %xmm3 213; CHECK: movaps %xmm1, %xmm2 214; X32: movasd %xmm0, %xmm1 215; X64: movap{{d|s}} %xmm5, %xmm1 216; CHECK: call{{l|q}} test_mixed_9_callee@@40 217; CHECK: addsd {{.*}}, %xmm0 218; CHECK: ret{{l|q}} 219entry: 220 %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00) 221 %add = fadd double 1.000000e+00, %call 222 ret double %add 223} 224