1; RUN: llc -fixup-byte-word-insts=1 -march=x86-64 < %s | \
2; RUN: FileCheck -check-prefix CHECK -check-prefix BWON %s
3; RUN: llc -fixup-byte-word-insts=0 -march=x86-64 < %s | \
4; RUN: FileCheck -check-prefix CHECK -check-prefix BWOFF %s
5
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7target triple = "x86_64-apple-macosx10.8.0"
8
9%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
10
11; This has byte loads interspersed with byte stores, in a single
12; basic-block loop.  The upper portion should be dead, so the movb loads
13; should have been changed into movzbl instead.
14; CHECK-LABEL: foo1
15; load:
16; BWON:  movzbl
17; BWOFF: movb
18; store:
19; CHECK: movb
20; load:
21; BWON: movzbl
22; BWOFF: movb
23; store:
24; CHECK: movb
25; CHECK: ret
26define void @foo1(i32 %count,
27                  %struct.A* noalias nocapture %q,
28                  %struct.A* noalias nocapture %p)
29                    nounwind uwtable noinline ssp {
30  %1 = icmp sgt i32 %count, 0
31  br i1 %1, label %.lr.ph, label %._crit_edge
32
33.lr.ph:                                           ; preds = %0
34  %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
35  %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
36  br label %a4
37
38a4:                                       ; preds = %4, %.lr.ph
39  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
40  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
41  %a5 = load i8, i8* %2, align 1
42  %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
43  store i8 %a5, i8* %a7, align 1
44  %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
45  %a6 = load i8, i8* %3, align 1
46  store i8 %a6, i8* %a8, align 1
47  %a9 = add nsw i32 %i.02, 1
48  %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
49  %exitcond = icmp eq i32 %a9, %count
50  br i1 %exitcond, label %._crit_edge, label %a4
51
52._crit_edge:                                      ; preds = %4, %0
53  ret void
54}
55
56%struct.B = type { i16, i16, i16, i16, i16, i16, i16, i16 }
57
58; This has word loads interspersed with word stores.
59; The upper portion should be dead, so the movw loads should have
60; been changed into movzwl instead.
61; CHECK-LABEL: foo2
62; load:
63; BWON:  movzwl
64; BWOFF: movw
65; store:
66; CHECK: movw
67; load:
68; BWON:  movzwl
69; BWOFF: movw
70; store:
71; CHECK: movw
72; CHECK: ret
73define void @foo2(i32 %count,
74                  %struct.B* noalias nocapture %q,
75                  %struct.B* noalias nocapture %p)
76                    nounwind uwtable noinline ssp {
77  %1 = icmp sgt i32 %count, 0
78  br i1 %1, label %.lr.ph, label %._crit_edge
79
80.lr.ph:                                           ; preds = %0
81  %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
82  %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
83  br label %a4
84
85a4:                                       ; preds = %4, %.lr.ph
86  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
87  %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %a10, %a4 ]
88  %a5 = load i16, i16* %2, align 2
89  %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
90  store i16 %a5, i16* %a7, align 2
91  %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
92  %a6 = load i16, i16* %3, align 2
93  store i16 %a6, i16* %a8, align 2
94  %a9 = add nsw i32 %i.02, 1
95  %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
96  %exitcond = icmp eq i32 %a9, %count
97  br i1 %exitcond, label %._crit_edge, label %a4
98
99._crit_edge:                                      ; preds = %4, %0
100  ret void
101}
102
103; This test contains nothing but a simple byte load and store.  Since
104; movb encodes smaller, we do not want to use movzbl unless in a tight loop.
105; So this test checks that movb is used.
106; CHECK-LABEL: foo3:
107; CHECK: movb
108; CHECK: movb
109define void @foo3(i8 *%dst, i8 *%src) {
110  %t0 = load i8, i8 *%src, align 1
111  store i8 %t0, i8 *%dst, align 1
112  ret void
113}
114
115; This test contains nothing but a simple word load and store.  Since
116; movw and movzwl are the same size, we should always choose to use
117; movzwl instead.
118; CHECK-LABEL: foo4:
119; BWON:  movzwl
120; BWOFF: movw
121; CHECK: movw
122define void @foo4(i16 *%dst, i16 *%src) {
123  %t0 = load i16, i16 *%src, align 2
124  store i16 %t0, i16 *%dst, align 2
125  ret void
126}
127