1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s
2
3define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
4; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
5; CHECK: vpmovsxbw (%rdi), %ymm0
6  %1 = load <16 x i8>, <16 x i8>* %a, align 1
7  %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
8  ret <16 x i16> %2
9}
10
11define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
12; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
13; CHECK: vpmovsxbd (%rdi), %ymm0
14  %1 = load <16 x i8>, <16 x i8>* %a, align 1
15  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
16  ret <8 x i32> %2
17}
18
19define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
20; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
21; CHECK: vpmovsxbq (%rdi), %ymm0
22  %1 = load <16 x i8>, <16 x i8>* %a, align 1
23  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
24  ret <4 x i64> %2
25}
26
27define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
28; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
29; CHECK: vpmovsxwd (%rdi), %ymm0
30  %1 = load <8 x i16>, <8 x i16>* %a, align 1
31  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
32  ret <8 x i32> %2
33}
34
35define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
36; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
37; CHECK: vpmovsxwq (%rdi), %ymm0
38  %1 = load <8 x i16>, <8 x i16>* %a, align 1
39  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
40  ret <4 x i64> %2
41}
42
43define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
44; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
45; CHECK: vpmovsxdq (%rdi), %ymm0
46  %1 = load <4 x i32>, <4 x i32>* %a, align 1
47  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
48  ret <4 x i64> %2
49}
50
51define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
52; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
53; CHECK: vpmovzxbw (%rdi), %ymm0
54  %1 = load <16 x i8>, <16 x i8>* %a, align 1
55  %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
56  ret <16 x i16> %2
57}
58
59define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
60; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
61; CHECK: vpmovzxbd (%rdi), %ymm0
62  %1 = load <16 x i8>, <16 x i8>* %a, align 1
63  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
64  ret <8 x i32> %2
65}
66
67define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
68; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
69; CHECK: vpmovzxbq (%rdi), %ymm0
70  %1 = load <16 x i8>, <16 x i8>* %a, align 1
71  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
72  ret <4 x i64> %2
73}
74
75define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
76; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
77; CHECK: vpmovzxwd (%rdi), %ymm0
78  %1 = load <8 x i16>, <8 x i16>* %a, align 1
79  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
80  ret <8 x i32> %2
81}
82
83define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
84; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
85; CHECK: vpmovzxwq (%rdi), %ymm0
86  %1 = load <8 x i16>, <8 x i16>* %a, align 1
87  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
88  ret <4 x i64> %2
89}
90
91define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
92; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
93; CHECK: vpmovzxdq (%rdi), %ymm0
94  %1 = load <4 x i32>, <4 x i32>* %a, align 1
95  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
96  ret <4 x i64> %2
97}
98
99declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)
100declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)
101declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)
102declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)
103declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)
104declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)
105declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)
106declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)
107declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)
108declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)
109declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)
110declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)
111