1; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
2
3; v32i8 -> v32i16
4; CHECK-LABEL: f0:
5; CHECK: r[[R0:[0-9]+]] = #64
6; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
7; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
8; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
9; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
10define void @f0(<32 x i8>* %a0, <32 x i16>* %a1) #0 {
11  %v0 = load <32 x i8>, <32 x i8>* %a0, align 128
12  %v1 = sext <32 x i8> %v0 to <32 x i16>
13  store <32 x i16> %v1, <32 x i16>* %a1, align 128
14  ret void
15}
16
17; v32i8 -> v32i32
18; CHECK-LABEL: f1:
19; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
20; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
21; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
22; CHECK: vmem(r1+#0) = v[[V4]]
23define void @f1(<32 x i8>* %a0, <32 x i32>* %a1) #0 {
24  %v0 = load <32 x i8>, <32 x i8>* %a0, align 128
25  %v1 = sext <32 x i8> %v0 to <32 x i32>
26  store <32 x i32> %v1, <32 x i32>* %a1, align 128
27  ret void
28}
29
30; v64i8 -> v64i16
31; CHECK-LABEL: f2:
32; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
33; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
34; CHECK: vmem(r1+#0) = v[[V2]]
35define void @f2(<64 x i8>* %a0, <64 x i16>* %a1) #0 {
36  %v0 = load <64 x i8>, <64 x i8>* %a0, align 128
37  %v1 = sext <64 x i8> %v0 to <64 x i16>
38  store <64 x i16> %v1, <64 x i16>* %a1, align 128
39  ret void
40}
41
42; v64i8 -> v64i32
43; CHECK-LABEL: f3:
44; CHECK:     v[[V0:[0-9]+]] = vmem(r0+#0)
45; CHECK:     v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
46; CHECK:     v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
47; CHECK-DAG: vmem(r1+#0) = v[[V4]]
48; CHECK-DAG: vmem(r1+#1) = v[[V3]]
49define void @f3(<64 x i8>* %a0, <64 x i32>* %a1) #0 {
50  %v0 = load <64 x i8>, <64 x i8>* %a0, align 128
51  %v1 = sext <64 x i8> %v0 to <64 x i32>
52  store <64 x i32> %v1, <64 x i32>* %a1, align 128
53  ret void
54}
55
56; v16i16 -> v16i32
57; CHECK-LABEL: f4:
58; CHECK: r[[R0:[0-9]+]] = #64
59; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
60; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
61; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
62; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
63define void @f4(<16 x i16>* %a0, <16 x i32>* %a1) #0 {
64  %v0 = load <16 x i16>, <16 x i16>* %a0, align 128
65  %v1 = sext <16 x i16> %v0 to <16 x i32>
66  store <16 x i32> %v1, <16 x i32>* %a1, align 128
67  ret void
68}
69
70; v32i16 -> v32i32
71; CHECK-LABEL: f5:
72; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
73; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
74; CHECK: vmem(r1+#0) = v[[V2]]
75define void @f5(<32 x i16>* %a0, <32 x i32>* %a1) #0 {
76  %v0 = load <32 x i16>, <32 x i16>* %a0, align 128
77  %v1 = sext <32 x i16> %v0 to <32 x i32>
78  store <32 x i32> %v1, <32 x i32>* %a1, align 128
79  ret void
80}
81
82; v8i8 -> v8i32
83; CHECK-LABEL: f6:
84; CHECK:     r[[R0:[0-9]+]]:[[R1:[0-9]+]] = memd(r0+#0)
85; CHECK-DAG: v[[V0:[0-9]+]].w = vinsert(r[[R0]])
86; CHECK-DAG: v[[V0]].w = vinsert(r[[R1]])
87; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
88; CHECK:     v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
89; CHECK:     v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
90; CHECK:     if (q[[Q0]]) vmem(r1+#0) = v[[V4]]
91define void @f6(<8 x i8>* %a0, <8 x i32>* %a1) #0 {
92  %v0 = load <8 x i8>, <8 x i8>* %a0, align 128
93  %v1 = sext <8 x i8> %v0 to <8 x i32>
94  store <8 x i32> %v1, <8 x i32>* %a1, align 128
95  ret void
96}
97
98attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" }
99
100