1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) { 6; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbw: 7; SSE41: ## BB#0: 8; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 9; SSE41-NEXT: retq 10; 11; AVX-LABEL: test_llvm_x86_sse41_pmovsxbw: 12; AVX: ## BB#0: 13; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 14; AVX-NEXT: retq 15 %1 = load <16 x i8>, <16 x i8>* %a, align 1 16 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 17 %3 = sext <8 x i8> %2 to <8 x i16> 18 ret <8 x i16> %3 19} 20 21define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) { 22; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbd: 23; SSE41: ## BB#0: 24; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 25; SSE41-NEXT: retq 26; 27; AVX-LABEL: test_llvm_x86_sse41_pmovsxbd: 28; AVX: ## BB#0: 29; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 30; AVX-NEXT: retq 31 %1 = load <16 x i8>, <16 x i8>* %a, align 1 32 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 33 %3 = sext <4 x i8> %2 to <4 x i32> 34 ret <4 x i32> %3 35} 36 37define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) { 38; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbq: 39; SSE41: ## BB#0: 40; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 41; SSE41-NEXT: retq 42; 43; AVX-LABEL: test_llvm_x86_sse41_pmovsxbq: 44; AVX: ## BB#0: 45; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 46; AVX-NEXT: retq 47 %1 = load <16 x i8>, <16 x i8>* %a, align 1 48 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 49 %3 = sext <2 x i8> %2 to <2 x i64> 50 ret <2 x i64> %3 51} 52 53define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) { 54; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwd: 55; SSE41: ## BB#0: 56; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 57; SSE41-NEXT: retq 58; 59; AVX-LABEL: test_llvm_x86_sse41_pmovsxwd: 60; AVX: ## BB#0: 61; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 62; AVX-NEXT: retq 63 %1 = load <8 x i16>, <8 x i16>* %a, align 1 64 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 65 %3 = sext <4 x i16> %2 to <4 x i32> 66 ret <4 x i32> %3 67} 68 69define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) { 70; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwq: 71; SSE41: ## BB#0: 72; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 73; SSE41-NEXT: retq 74; 75; AVX-LABEL: test_llvm_x86_sse41_pmovsxwq: 76; AVX: ## BB#0: 77; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 78; AVX-NEXT: retq 79 %1 = load <8 x i16>, <8 x i16>* %a, align 1 80 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 81 %3 = sext <2 x i16> %2 to <2 x i64> 82 ret <2 x i64> %3 83} 84 85define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) { 86; SSE41-LABEL: test_llvm_x86_sse41_pmovsxdq: 87; SSE41: ## BB#0: 88; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 89; SSE41-NEXT: retq 90; 91; AVX-LABEL: test_llvm_x86_sse41_pmovsxdq: 92; AVX: ## BB#0: 93; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 94; AVX-NEXT: retq 95 %1 = load <4 x i32>, <4 x i32>* %a, align 1 96 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 97 %3 = sext <2 x i32> %2 to <2 x i64> 98 ret <2 x i64> %3 99} 100 101define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) { 102; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbw: 103; SSE41: ## BB#0: 104; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 105; SSE41-NEXT: retq 106; 107; AVX-LABEL: test_llvm_x86_sse41_pmovzxbw: 108; AVX: ## BB#0: 109; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 110; AVX-NEXT: retq 111 %1 = load <16 x i8>, <16 x i8>* %a, align 1 112 %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1) 113 ret <8 x i16> %2 114} 115 116define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) { 117; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbd: 118; SSE41: ## BB#0: 119; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 120; SSE41-NEXT: retq 121; 122; AVX-LABEL: test_llvm_x86_sse41_pmovzxbd: 123; AVX: ## BB#0: 124; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 125; AVX-NEXT: retq 126 %1 = load <16 x i8>, <16 x i8>* %a, align 1 127 %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1) 128 ret <4 x i32> %2 129} 130 131define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) { 132; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbq: 133; SSE41: ## BB#0: 134; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 135; SSE41-NEXT: retq 136; 137; AVX-LABEL: test_llvm_x86_sse41_pmovzxbq: 138; AVX: ## BB#0: 139; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 140; AVX-NEXT: retq 141 %1 = load <16 x i8>, <16 x i8>* %a, align 1 142 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1) 143 ret <2 x i64> %2 144} 145 146define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) { 147; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwd: 148; SSE41: ## BB#0: 149; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 150; SSE41-NEXT: retq 151; 152; AVX-LABEL: test_llvm_x86_sse41_pmovzxwd: 153; AVX: ## BB#0: 154; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 155; AVX-NEXT: retq 156 %1 = load <8 x i16>, <8 x i16>* %a, align 1 157 %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1) 158 ret <4 x i32> %2 159} 160 161define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) { 162; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwq: 163; SSE41: ## BB#0: 164; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 165; SSE41-NEXT: retq 166; 167; AVX-LABEL: test_llvm_x86_sse41_pmovzxwq: 168; AVX: ## BB#0: 169; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 170; AVX-NEXT: retq 171 %1 = load <8 x i16>, <8 x i16>* %a, align 1 172 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1) 173 ret <2 x i64> %2 174} 175 176define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) { 177; SSE41-LABEL: test_llvm_x86_sse41_pmovzxdq: 178; SSE41: ## BB#0: 179; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 180; SSE41-NEXT: retq 181; 182; AVX-LABEL: test_llvm_x86_sse41_pmovzxdq: 183; AVX: ## BB#0: 184; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 185; AVX-NEXT: retq 186 %1 = load <4 x i32>, <4 x i32>* %a, align 1 187 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1) 188 ret <2 x i64> %2 189} 190 191declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) 192declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) 193declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) 194declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) 195declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) 196declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) 197