1; bswap should be constant folded when it is passed a constant argument
2
3; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s
4; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64
5
6declare i16 @llvm.bswap.i16(i16)
7
8declare i32 @llvm.bswap.i32(i32)
9
10declare i64 @llvm.bswap.i64(i64)
11
12define i16 @W(i16 %A) {
13; CHECK-LABEL: W:
14; CHECK: rolw $8, %ax
15
16; CHECK64-LABEL: W:
17; CHECK64: rolw $8, %
18        %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
19        ret i16 %Z
20}
21
22define i32 @X(i32 %A) {
23; CHECK-LABEL: X:
24; CHECK: bswapl %eax
25
26; CHECK64-LABEL: X:
27; CHECK64: bswapl %
28        %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
29        ret i32 %Z
30}
31
32define i64 @Y(i64 %A) {
33; CHECK-LABEL: Y:
34; CHECK: bswapl %eax
35; CHECK: bswapl %edx
36
37; CHECK64-LABEL: Y:
38; CHECK64: bswapq %
39        %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
40        ret i64 %Z
41}
42
43; rdar://9164521
44define i32 @test1(i32 %a) nounwind readnone {
45entry:
46; CHECK-LABEL: test1:
47; CHECK: bswapl [[REG:%.*]]
48; CHECK: shrl $16, [[REG]]
49
50; CHECK64-LABEL: test1:
51; CHECK64: bswapl [[REG:%.*]]
52; CHECK64: shrl $16, [[REG]]
53  %and = lshr i32 %a, 8
54  %shr3 = and i32 %and, 255
55  %and2 = shl i32 %a, 8
56  %shl = and i32 %and2, 65280
57  %or = or i32 %shr3, %shl
58  ret i32 %or
59}
60
61define i32 @test2(i32 %a) nounwind readnone {
62entry:
63; CHECK-LABEL: test2:
64; CHECK: bswapl [[REG:%.*]]
65; CHECK: sarl $16, [[REG]]
66
67; CHECK64-LABEL: test2:
68; CHECK64: bswapl [[REG:%.*]]
69; CHECK64: sarl $16, [[REG]]
70  %and = lshr i32 %a, 8
71  %shr4 = and i32 %and, 255
72  %and2 = shl i32 %a, 8
73  %or = or i32 %shr4, %and2
74  %sext = shl i32 %or, 16
75  %conv3 = ashr exact i32 %sext, 16
76  ret i32 %conv3
77}
78
79@var8 = global i8 0
80@var16 = global i16 0
81
82; The "shl" below can move bits into the high parts of the value, so the
83; operation is not a "bswap, shr" pair.
84
85; rdar://problem/14814049
86define i64 @not_bswap() {
87; CHECK-LABEL: not_bswap:
88; CHECK-NOT: bswapl
89; CHECK: ret
90
91; CHECK64-LABEL: not_bswap:
92; CHECK64-NOT: bswapq
93; CHECK64: ret
94  %init = load i16, i16* @var16
95  %big = zext i16 %init to i64
96
97  %hishifted = lshr i64 %big, 8
98  %loshifted = shl i64 %big, 8
99
100  %notswapped = or i64 %hishifted, %loshifted
101
102  ret i64 %notswapped
103}
104
105; This time, the lshr (and subsequent or) is completely useless. While it's
106; technically correct to convert this into a "bswap, shr", it's suboptimal. A
107; simple shl works better.
108
109define i64 @not_useful_bswap() {
110; CHECK-LABEL: not_useful_bswap:
111; CHECK-NOT: bswapl
112; CHECK: ret
113
114; CHECK64-LABEL: not_useful_bswap:
115; CHECK64-NOT: bswapq
116; CHECK64: ret
117
118  %init = load i8, i8* @var8
119  %big = zext i8 %init to i64
120
121  %hishifted = lshr i64 %big, 8
122  %loshifted = shl i64 %big, 8
123
124  %notswapped = or i64 %hishifted, %loshifted
125
126  ret i64 %notswapped
127}
128
129; Finally, it *is* OK to just mask off the shl if we know that the value is zero
130; beyond 16 bits anyway. This is a legitimate bswap.
131
132define i64 @finally_useful_bswap() {
133; CHECK-LABEL: finally_useful_bswap:
134; CHECK: bswapl [[REG:%.*]]
135; CHECK: shrl $16, [[REG]]
136; CHECK: ret
137
138; CHECK64-LABEL: finally_useful_bswap:
139; CHECK64: bswapq [[REG:%.*]]
140; CHECK64: shrq $48, [[REG]]
141; CHECK64: ret
142
143  %init = load i16, i16* @var16
144  %big = zext i16 %init to i64
145
146  %hishifted = lshr i64 %big, 8
147  %lomasked = and i64 %big, 255
148  %loshifted = shl i64 %lomasked, 8
149
150  %swapped = or i64 %hishifted, %loshifted
151
152  ret i64 %swapped
153}
154
155