1; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
2
3; If the "rx = #N, vsetq(rx)" get reordered with the rest, update the test.
4
5; v32i16 -> v32i8
6; CHECK-LABEL: f0:
7; CHECK: r[[R0:[0-9]+]] = #32
8; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
9; CHECK: v[[V1:[0-9]+]].b = vdeal(v[[V0]].b)
10; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
11; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]]
12define void @f0(<32 x i16>* %a0, <32 x i8>* %a1) #0 {
13  %v0 = load <32 x i16>, <32 x i16>* %a0, align 128
14  %v1 = trunc <32 x i16> %v0 to <32 x i8>
15  store <32 x i8> %v1, <32 x i8>* %a1, align 128
16  ret void
17}
18
19; v32i32 -> v32i8
20; CHECK-LABEL: f1:
21; CHECK: r[[R0:[0-9]+]] = #32
22; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
23; CHECK: v[[V1:[0-9]+]].b = vdeale({{.*}},v[[V0]].b)
24; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
25; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]]
26define void @f1(<32 x i32>* %a0, <32 x i8>* %a1) #0 {
27  %v0 = load <32 x i32>, <32 x i32>* %a0, align 128
28  %v1 = trunc <32 x i32> %v0 to <32 x i8>
29  store <32 x i8> %v1, <32 x i8>* %a1, align 128
30  ret void
31}
32
33; v64i16 -> v64i8
34; CHECK-LABEL: f2:
35; CHECK: r[[R0:[0-9]+]] = #64
36; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
37; CHECK: v[[V1:[0-9]+]].b = vdeal(v[[V0]].b)
38; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
39; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]]
40define void @f2(<64 x i16>* %a0, <64 x i8>* %a1) #0 {
41  %v0 = load <64 x i16>, <64 x i16>* %a0, align 128
42  %v1 = trunc <64 x i16> %v0 to <64 x i8>
43  store <64 x i8> %v1, <64 x i8>* %a1, align 128
44  ret void
45}
46
47; v64i32 -> v64i8
48; CHECK-LABEL: f3:
49; CHECK-DAG: v[[V0:[0-9]+]] = vmem(r0+#0)
50; CHECK-DAG: v[[V1:[0-9]+]] = vmem(r0+#1)
51; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
52; CHECK: v[[V2:[0-9]+]].h = vpacke(v[[V1]].w,v[[V0]].w)
53; CHECK: v[[V3:[0-9]+]].b = vpacke({{.*}},v[[V2]].h)
54; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V3]]
55define void @f3(<64 x i32>* %a0, <64 x i8>* %a1) #0 {
56  %v0 = load <64 x i32>, <64 x i32>* %a0, align 128
57  %v1 = trunc <64 x i32> %v0 to <64 x i8>
58  store <64 x i8> %v1, <64 x i8>* %a1, align 128
59  ret void
60}
61
62; v16i32 -> v16i16
63; CHECK-LABEL: f4:
64; CHECK: r[[R0:[0-9]+]] = #32
65; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
66; CHECK: v[[V1:[0-9]+]].h = vdeal(v[[V0]].h)
67; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
68; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]]
69define void @f4(<16 x i32>* %a0, <16 x i16>* %a1) #0 {
70  %v0 = load <16 x i32>, <16 x i32>* %a0, align 128
71  %v1 = trunc <16 x i32> %v0 to <16 x i16>
72  store <16 x i16> %v1, <16 x i16>* %a1, align 128
73  ret void
74}
75
76; v32i32 -> v32i16
77; CHECK-LABEL: f5:
78; CHECK: r[[R0:[0-9]+]] = #64
79; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
80; CHECK: v[[V1:[0-9]+]].h = vdeal(v[[V0]].h)
81; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
82; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V1]]
83define void @f5(<32 x i32>* %a0, <32 x i16>* %a1) #0 {
84  %v0 = load <32 x i32>, <32 x i32>* %a0, align 128
85  %v1 = trunc <32 x i32> %v0 to <32 x i16>
86  store <32 x i16> %v1, <32 x i16>* %a1, align 128
87  ret void
88}
89
90; v8i32 -> v8i8
91; CHECK-LABEL: f6:
92; CHECK:     v[[V0:[0-9]+]] = vmem(r0+#0)
93; CHECK:     v[[V1:[0-9]+]].b = vdeale({{.*}},v[[V0]].b)
94; CHECK:     vmem(r[[R0:[0-9]+]]+#0) = v[[V1]]
95; CHECK-DAG: r[[R1:[0-9]+]] = memw(r[[R0]]+#0)
96; CHECK-DAG: r[[R2:[0-9]+]] = memw(r[[R0]]+#4)
97; CHECK:     memd(r1+#0) = r[[R2]]:[[R1]]
98define void @f6(<8 x i32>* %a0, <8 x i8>* %a1) #0 {
99  %v0 = load <8 x i32>, <8 x i32>* %a0, align 128
100  %v1 = trunc <8 x i32> %v0 to <8 x i8>
101  store <8 x i8> %v1, <8 x i8>* %a1, align 128
102  ret void
103}
104
105
106attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" }
107
108