1; Test loop tuning.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
4
5; Test that strength reduction is applied to addresses with a scale factor,
6; but that indexed addressing can still be used.
7define void @f1(i32 *%dest, i32 %a) {
8; CHECK-LABEL: f1:
9; CHECK-NOT: sllg
10; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
11; CHECK: br %r14
12entry:
13  br label %loop
14
15loop:
16  %index = phi i64 [ 0, %entry ], [ %next, %loop ]
17  %ptr = getelementptr i32, i32 *%dest, i64 %index
18  store i32 %a, i32 *%ptr
19  %next = add i64 %index, 1
20  %cmp = icmp ne i64 %next, 100
21  br i1 %cmp, label %loop, label %exit
22
23exit:
24  ret void
25}
26
27; Test a loop that should be converted into dbr form and then use BRCT.
28define void @f2(i32 *%src, i32 *%dest) {
29; CHECK-LABEL: f2:
30; CHECK: lhi [[REG:%r[0-5]]], 100
31; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
32; CHECK: brct [[REG]], [[LABEL]]
33; CHECK: br %r14
34entry:
35  br label %loop
36
37loop:
38  %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
39  %next = add i32 %count, 1
40  %val = load volatile i32 , i32 *%src
41  %cmp = icmp eq i32 %val, 0
42  br i1 %cmp, label %loop.next, label %loop.store
43
44loop.store:
45  %add = add i32 %val, 1
46  store volatile i32 %add, i32 *%dest
47  br label %loop.next
48
49loop.next:
50  %cont = icmp ne i32 %next, 100
51  br i1 %cont, label %loop, label %exit
52
53exit:
54  ret void
55}
56
57; Like f2, but for BRCTG.
58define void @f3(i64 *%src, i64 *%dest) {
59; CHECK-LABEL: f3:
60; CHECK: lghi [[REG:%r[0-5]]], 100
61; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
62; CHECK: brctg [[REG]], [[LABEL]]
63; CHECK: br %r14
64entry:
65  br label %loop
66
67loop:
68  %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
69  %next = add i64 %count, 1
70  %val = load volatile i64 , i64 *%src
71  %cmp = icmp eq i64 %val, 0
72  br i1 %cmp, label %loop.next, label %loop.store
73
74loop.store:
75  %add = add i64 %val, 1
76  store volatile i64 %add, i64 *%dest
77  br label %loop.next
78
79loop.next:
80  %cont = icmp ne i64 %next, 100
81  br i1 %cont, label %loop, label %exit
82
83exit:
84  ret void
85}
86
87; Test a loop with a 64-bit decremented counter in which the 32-bit
88; low part of the counter is used after the decrement.  This is an example
89; of a subregister use being the only thing that blocks a conversion to BRCTG.
90define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
91; CHECK-LABEL: f4:
92; CHECK: aghi [[REG:%r[0-5]]], -1
93; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
94; CHECK: stg [[REG2]],
95; CHECK: jne {{\..*}}
96; CHECK: br %r14
97entry:
98  br label %loop
99
100loop:
101  %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
102  store volatile i64 %left, i64 *%dest2
103  %val = load volatile i32 , i32 *%src
104  %cmp = icmp eq i32 %val, 0
105  br i1 %cmp, label %loop.next, label %loop.store
106
107loop.store:
108  %add = add i32 %val, 1
109  store volatile i32 %add, i32 *%dest
110  br label %loop.next
111
112loop.next:
113  %next = add i64 %left, -1
114  %ext = zext i32 %val to i64
115  %shl = shl i64 %ext, 32
116  %and = and i64 %next, 4294967295
117  %or = or i64 %shl, %and
118  store volatile i64 %or, i64 *%dest2
119  %cont = icmp ne i64 %next, 0
120  br i1 %cont, label %loop, label %exit
121
122exit:
123  ret void
124}
125