1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3
4define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
5; CHECK-LABEL: foo:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    movdqa %xmm0, %xmm4
8; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
9; CHECK-NEXT:    movd %xmm0, %eax
10; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,3,3,3]
11; CHECK-NEXT:    movd %xmm0, %ecx
12; CHECK-NEXT:    cltd
13; CHECK-NEXT:    idivl %ecx
14; CHECK-NEXT:    movd %edx, %xmm0
15; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
16; CHECK-NEXT:    movd %xmm5, %eax
17; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
18; CHECK-NEXT:    movd %xmm5, %ecx
19; CHECK-NEXT:    cltd
20; CHECK-NEXT:    idivl %ecx
21; CHECK-NEXT:    movd %edx, %xmm5
22; CHECK-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
23; CHECK-NEXT:    movd %xmm4, %eax
24; CHECK-NEXT:    movd %xmm2, %ecx
25; CHECK-NEXT:    cltd
26; CHECK-NEXT:    idivl %ecx
27; CHECK-NEXT:    movd %edx, %xmm0
28; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
29; CHECK-NEXT:    movd %xmm4, %eax
30; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
31; CHECK-NEXT:    movd %xmm2, %ecx
32; CHECK-NEXT:    cltd
33; CHECK-NEXT:    idivl %ecx
34; CHECK-NEXT:    movd %edx, %xmm2
35; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
36; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
37; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
38; CHECK-NEXT:    movd %xmm2, %eax
39; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[3,3,3,3]
40; CHECK-NEXT:    movd %xmm2, %ecx
41; CHECK-NEXT:    cltd
42; CHECK-NEXT:    idivl %ecx
43; CHECK-NEXT:    movd %edx, %xmm2
44; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
45; CHECK-NEXT:    movd %xmm4, %eax
46; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
47; CHECK-NEXT:    movd %xmm4, %ecx
48; CHECK-NEXT:    cltd
49; CHECK-NEXT:    idivl %ecx
50; CHECK-NEXT:    movd %edx, %xmm4
51; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
52; CHECK-NEXT:    movd %xmm1, %eax
53; CHECK-NEXT:    movd %xmm3, %ecx
54; CHECK-NEXT:    cltd
55; CHECK-NEXT:    idivl %ecx
56; CHECK-NEXT:    movd %edx, %xmm2
57; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
58; CHECK-NEXT:    movd %xmm1, %eax
59; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
60; CHECK-NEXT:    movd %xmm1, %ecx
61; CHECK-NEXT:    cltd
62; CHECK-NEXT:    idivl %ecx
63; CHECK-NEXT:    movd %edx, %xmm1
64; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
65; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
66; CHECK-NEXT:    movdqa %xmm2, %xmm1
67; CHECK-NEXT:    retq
68	%m = srem <8 x i32> %t, %u
69	ret <8 x i32> %m
70}
71define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
72; CHECK-LABEL: bar:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    movdqa %xmm0, %xmm4
75; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
76; CHECK-NEXT:    movd %xmm0, %eax
77; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,3,3,3]
78; CHECK-NEXT:    movd %xmm0, %ecx
79; CHECK-NEXT:    xorl %edx, %edx
80; CHECK-NEXT:    divl %ecx
81; CHECK-NEXT:    movd %edx, %xmm0
82; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
83; CHECK-NEXT:    movd %xmm5, %eax
84; CHECK-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
85; CHECK-NEXT:    movd %xmm5, %ecx
86; CHECK-NEXT:    xorl %edx, %edx
87; CHECK-NEXT:    divl %ecx
88; CHECK-NEXT:    movd %edx, %xmm5
89; CHECK-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
90; CHECK-NEXT:    movd %xmm4, %eax
91; CHECK-NEXT:    movd %xmm2, %ecx
92; CHECK-NEXT:    xorl %edx, %edx
93; CHECK-NEXT:    divl %ecx
94; CHECK-NEXT:    movd %edx, %xmm0
95; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,1,1]
96; CHECK-NEXT:    movd %xmm4, %eax
97; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
98; CHECK-NEXT:    movd %xmm2, %ecx
99; CHECK-NEXT:    xorl %edx, %edx
100; CHECK-NEXT:    divl %ecx
101; CHECK-NEXT:    movd %edx, %xmm2
102; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
103; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
104; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
105; CHECK-NEXT:    movd %xmm2, %eax
106; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[3,3,3,3]
107; CHECK-NEXT:    movd %xmm2, %ecx
108; CHECK-NEXT:    xorl %edx, %edx
109; CHECK-NEXT:    divl %ecx
110; CHECK-NEXT:    movd %edx, %xmm2
111; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
112; CHECK-NEXT:    movd %xmm4, %eax
113; CHECK-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
114; CHECK-NEXT:    movd %xmm4, %ecx
115; CHECK-NEXT:    xorl %edx, %edx
116; CHECK-NEXT:    divl %ecx
117; CHECK-NEXT:    movd %edx, %xmm4
118; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
119; CHECK-NEXT:    movd %xmm1, %eax
120; CHECK-NEXT:    movd %xmm3, %ecx
121; CHECK-NEXT:    xorl %edx, %edx
122; CHECK-NEXT:    divl %ecx
123; CHECK-NEXT:    movd %edx, %xmm2
124; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
125; CHECK-NEXT:    movd %xmm1, %eax
126; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
127; CHECK-NEXT:    movd %xmm1, %ecx
128; CHECK-NEXT:    xorl %edx, %edx
129; CHECK-NEXT:    divl %ecx
130; CHECK-NEXT:    movd %edx, %xmm1
131; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
132; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
133; CHECK-NEXT:    movdqa %xmm2, %xmm1
134; CHECK-NEXT:    retq
135	%m = urem <8 x i32> %t, %u
136	ret <8 x i32> %m
137}
138define <8 x float> @qux(<8 x float> %t, <8 x float> %u) {
139; CHECK-LABEL: qux:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    subq $104, %rsp
142; CHECK-NEXT:    .cfi_def_cfa_offset 112
143; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
144; CHECK-NEXT:    movaps %xmm2, (%rsp) # 16-byte Spill
145; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
146; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
147; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
148; CHECK-NEXT:    movaps %xmm2, %xmm1
149; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm2[3,3]
150; CHECK-NEXT:    callq fmodf
151; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
152; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
153; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
154; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
155; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
156; CHECK-NEXT:    callq fmodf
157; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
158; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
159; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
160; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
161; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
162; CHECK-NEXT:    callq fmodf
163; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
164; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
165; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
166; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
167; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
168; CHECK-NEXT:    callq fmodf
169; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
170; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
171; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
172; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
173; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
174; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
175; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
176; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
177; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
178; CHECK-NEXT:    callq fmodf
179; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
180; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
181; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
182; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
183; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
184; CHECK-NEXT:    callq fmodf
185; CHECK-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
186; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
187; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
188; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
189; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
190; CHECK-NEXT:    callq fmodf
191; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
192; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
193; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
194; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
195; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
196; CHECK-NEXT:    callq fmodf
197; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
198; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
199; CHECK-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
200; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
201; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
202; CHECK-NEXT:    addq $104, %rsp
203; CHECK-NEXT:    .cfi_def_cfa_offset 8
204; CHECK-NEXT:    retq
205	%m = frem <8 x float> %t, %u
206	ret <8 x float> %m
207}
208