1; RUN: llc -march=hexagon < %s | FileCheck %s
2; Testing for these 6 variants of circular load:
3;   Q6_circ_load_update_B(inputLR, pDelay, -1, nConvLength, 4);
4;   Q6_circ_load_update_D(inputLR, pDelay, -1, nConvLength, 4);
5;   Q6_circ_load_update_H(inputLR, pDelay, -1, nConvLength, 4);
6;   Q6_circ_load_update_UB(inputLR, pDelay, -1, nConvLength, 4);
7;   Q6_circ_load_update_UH(inputLR, pDelay, -1, nConvLength, 4);
8;   Q6_circ_load_update_W(inputLR, pDelay, -1, nConvLength, 4);
9; producing these:
10;   r0 = memb(r1++#-1:circ(m0))
11;   r3:2 = memd(r1++#-8:circ(m0))
12;   r0 = memh(r1++#-2:circ(m0))
13;   r0 = memub(r1++#-1:circ(m0))
14;   r0 = memuh(r1++#-2:circ(m0))
15;   r0 = memw(r1++#-4:circ(m0))
16
17target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
18target triple = "hexagon"
19
20define signext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
21entry:
22  %inputLR = alloca i8, align 1
23  %conv = zext i16 %filtMemLen to i32
24  %shr1 = lshr i32 %conv, 1
25  %idxprom = sext i16 %filtMemIndex to i32
26  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
27  %0 = bitcast i16* %arrayidx to i8*
28  %or = or i32 %shr1, 33554432
29; CHECK: = memb(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
30  %1 = call i8* @llvm.hexagon.circ.ldb(i8* %0, i8* %inputLR, i32 %or, i32 -1)
31  %2 = load i8, i8* %inputLR, align 1, !tbaa !0
32  ret i8 %2
33}
34
35declare i8* @llvm.hexagon.circ.ldb(i8*, i8*, i32, i32) nounwind
36
37define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
38entry:
39  %inputLR = alloca i64, align 8
40  %conv = zext i16 %filtMemLen to i32
41  %shr1 = lshr i32 %conv, 1
42  %idxprom = sext i16 %filtMemIndex to i32
43  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
44  %0 = bitcast i16* %arrayidx to i8*
45  %1 = bitcast i64* %inputLR to i8*
46  %shl = shl nuw nsw i32 %shr1, 3
47  %or = or i32 %shl, 83886080
48; CHECK: = memd(r{{[0-9]*.}}++{{.}}#-8:circ(m{{[0-1]}}))
49  %2 = call i8* @llvm.hexagon.circ.ldd(i8* %0, i8* %1, i32 %or, i32 -8)
50  %3 = bitcast i8* %1 to i64*
51  %4 = load i64, i64* %3, align 8, !tbaa !0
52  ret i64 %4
53}
54
55declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind
56
57define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
58entry:
59  %inputLR = alloca i16, align 2
60  %conv = zext i16 %filtMemLen to i32
61  %shr1 = and i32 %conv, 65534
62  %idxprom = sext i16 %filtMemIndex to i32
63  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
64  %0 = bitcast i16* %arrayidx to i8*
65  %1 = bitcast i16* %inputLR to i8*
66  %or = or i32 %shr1, 50331648
67; CHECK: = memh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
68  %2 = call i8* @llvm.hexagon.circ.ldh(i8* %0, i8* %1, i32 %or, i32 -2)
69  %3 = bitcast i8* %1 to i16*
70  %4 = load i16, i16* %3, align 2, !tbaa !2
71  ret i16 %4
72}
73
74declare i8* @llvm.hexagon.circ.ldh(i8*, i8*, i32, i32) nounwind
75
76define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
77entry:
78  %inputLR = alloca i8, align 1
79  %conv = zext i16 %filtMemLen to i32
80  %shr1 = lshr i32 %conv, 1
81  %idxprom = sext i16 %filtMemIndex to i32
82  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
83  %0 = bitcast i16* %arrayidx to i8*
84  %or = or i32 %shr1, 33554432
85; CHECK: = memub(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
86  %1 = call i8* @llvm.hexagon.circ.ldub(i8* %0, i8* %inputLR, i32 %or, i32 -1)
87  %2 = load i8, i8* %inputLR, align 1, !tbaa !0
88  ret i8 %2
89}
90
91declare i8* @llvm.hexagon.circ.ldub(i8*, i8*, i32, i32) nounwind
92
93define zeroext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
94entry:
95  %inputLR = alloca i16, align 2
96  %conv = zext i16 %filtMemLen to i32
97  %shr1 = and i32 %conv, 65534
98  %idxprom = sext i16 %filtMemIndex to i32
99  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
100  %0 = bitcast i16* %arrayidx to i8*
101  %1 = bitcast i16* %inputLR to i8*
102  %or = or i32 %shr1, 50331648
103; CHECK: = memuh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
104  %2 = call i8* @llvm.hexagon.circ.lduh(i8* %0, i8* %1, i32 %or, i32 -2)
105  %3 = bitcast i8* %1 to i16*
106  %4 = load i16, i16* %3, align 2, !tbaa !2
107  ret i16 %4
108}
109
110declare i8* @llvm.hexagon.circ.lduh(i8*, i8*, i32, i32) nounwind
111
112define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
113entry:
114  %inputLR = alloca i32, align 4
115  %conv = zext i16 %filtMemLen to i32
116  %shr1 = lshr i32 %conv, 1
117  %idxprom = sext i16 %filtMemIndex to i32
118  %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
119  %0 = bitcast i16* %arrayidx to i8*
120  %1 = bitcast i32* %inputLR to i8*
121  %shl = shl nuw nsw i32 %shr1, 2
122  %or = or i32 %shl, 67108864
123; CHECK: = memw(r{{[0-9]*.}}++{{.}}#-4:circ(m{{[0-1]}}))
124  %2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 %or, i32 -4)
125  %3 = bitcast i8* %1 to i32*
126  %4 = load i32, i32* %3, align 4, !tbaa !3
127  ret i32 %4
128}
129
130declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind
131
132!0 = !{!"omnipotent char", !1}
133!1 = !{!"Simple C/C++ TBAA"}
134!2 = !{!"short", !0}
135!3 = !{!"int", !0}
136