1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX 3; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL 6 7define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { 8; AVX2-LABEL: test_x86_avx2_packssdw: 9; AVX2: # %bb.0: 10; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x6b,0xc1] 11; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12; 13; AVX512VL-LABEL: test_x86_avx2_packssdw: 14; AVX512VL: # %bb.0: 15; AVX512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 16; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 18 ret <16 x i16> %res 19} 20declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 21 22 23define <16 x i16> @test_x86_avx2_packssdw_fold() { 24; X86-AVX-LABEL: test_x86_avx2_packssdw_fold: 25; X86-AVX: # %bb.0: 26; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 27; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 28; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 29; X86-AVX-NEXT: retl # encoding: [0xc3] 30; 31; X86-AVX512VL-LABEL: test_x86_avx2_packssdw_fold: 32; X86-AVX512VL: # %bb.0: 33; X86-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 34; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 35; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 36; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 37; 38; X64-AVX-LABEL: test_x86_avx2_packssdw_fold: 39; X64-AVX: # %bb.0: 40; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 41; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 42; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 43; X64-AVX-NEXT: retq # encoding: [0xc3] 44; 45; X64-AVX512VL-LABEL: test_x86_avx2_packssdw_fold: 46; X64-AVX512VL: # %bb.0: 47; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 48; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 49; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 50; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 51 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) 52 ret <16 x i16> %res 53} 54 55 56define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { 57; AVX2-LABEL: test_x86_avx2_packsswb: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x63,0xc1] 60; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 61; 62; AVX512VL-LABEL: test_x86_avx2_packsswb: 63; AVX512VL: # %bb.0: 64; AVX512VL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 65; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 66 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 67 ret <32 x i8> %res 68} 69declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 70 71 72define <32 x i8> @test_x86_avx2_packsswb_fold() { 73; X86-AVX-LABEL: test_x86_avx2_packsswb_fold: 74; X86-AVX: # %bb.0: 75; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 76; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 77; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 78; X86-AVX-NEXT: retl # encoding: [0xc3] 79; 80; X86-AVX512VL-LABEL: test_x86_avx2_packsswb_fold: 81; X86-AVX512VL: # %bb.0: 82; X86-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 83; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 84; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 85; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 86; 87; X64-AVX-LABEL: test_x86_avx2_packsswb_fold: 88; X64-AVX: # %bb.0: 89; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 90; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 91; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 92; X64-AVX-NEXT: retq # encoding: [0xc3] 93; 94; X64-AVX512VL-LABEL: test_x86_avx2_packsswb_fold: 95; X64-AVX512VL: # %bb.0: 96; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 97; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 98; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 99; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 100 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer) 101 ret <32 x i8> %res 102} 103 104 105define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { 106; AVX2-LABEL: test_x86_avx2_packuswb: 107; AVX2: # %bb.0: 108; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x67,0xc1] 109; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 110; 111; AVX512VL-LABEL: test_x86_avx2_packuswb: 112; AVX512VL: # %bb.0: 113; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 114; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 115 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 116 ret <32 x i8> %res 117} 118declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 119 120 121define <32 x i8> @test_x86_avx2_packuswb_fold() { 122; X86-AVX-LABEL: test_x86_avx2_packuswb_fold: 123; X86-AVX: # %bb.0: 124; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 125; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 126; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 127; X86-AVX-NEXT: retl # encoding: [0xc3] 128; 129; X86-AVX512VL-LABEL: test_x86_avx2_packuswb_fold: 130; X86-AVX512VL: # %bb.0: 131; X86-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 132; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 133; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 134; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 135; 136; X64-AVX-LABEL: test_x86_avx2_packuswb_fold: 137; X64-AVX: # %bb.0: 138; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 139; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 140; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 141; X64-AVX-NEXT: retq # encoding: [0xc3] 142; 143; X64-AVX512VL-LABEL: test_x86_avx2_packuswb_fold: 144; X64-AVX512VL: # %bb.0: 145; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 146; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 147; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 148; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 149 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer) 150 ret <32 x i8> %res 151} 152 153 154define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) { 155; AVX2-LABEL: test_x86_avx2_pavg_b: 156; AVX2: # %bb.0: 157; AVX2-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe0,0xc1] 158; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 159; 160; AVX512VL-LABEL: test_x86_avx2_pavg_b: 161; AVX512VL: # %bb.0: 162; AVX512VL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] 163; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 164 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 165 ret <32 x i8> %res 166} 167declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 168 169 170define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) { 171; AVX2-LABEL: test_x86_avx2_pavg_w: 172; AVX2: # %bb.0: 173; AVX2-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe3,0xc1] 174; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 175; 176; AVX512VL-LABEL: test_x86_avx2_pavg_w: 177; AVX512VL: # %bb.0: 178; AVX512VL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] 179; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 180 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 181 ret <16 x i16> %res 182} 183declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 184 185 186define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { 187; AVX2-LABEL: test_x86_avx2_pmadd_wd: 188; AVX2: # %bb.0: 189; AVX2-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf5,0xc1] 190; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 191; 192; AVX512VL-LABEL: test_x86_avx2_pmadd_wd: 193; AVX512VL: # %bb.0: 194; AVX512VL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] 195; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 196 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] 197 ret <8 x i32> %res 198} 199declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 200 201 202define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) { 203; AVX2-LABEL: test_x86_avx2_pmaxs_w: 204; AVX2: # %bb.0: 205; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xee,0xc1] 206; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 207; 208; AVX512VL-LABEL: test_x86_avx2_pmaxs_w: 209; AVX512VL: # %bb.0: 210; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1] 211; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 212 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 213 ret <16 x i16> %res 214} 215declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 216 217 218define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) { 219; AVX2-LABEL: test_x86_avx2_pmaxu_b: 220; AVX2: # %bb.0: 221; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xde,0xc1] 222; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 223; 224; AVX512VL-LABEL: test_x86_avx2_pmaxu_b: 225; AVX512VL: # %bb.0: 226; AVX512VL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1] 227; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 228 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 229 ret <32 x i8> %res 230} 231declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 232 233 234define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) { 235; AVX2-LABEL: test_x86_avx2_pmins_w: 236; AVX2: # %bb.0: 237; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xea,0xc1] 238; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 239; 240; AVX512VL-LABEL: test_x86_avx2_pmins_w: 241; AVX512VL: # %bb.0: 242; AVX512VL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1] 243; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 244 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 245 ret <16 x i16> %res 246} 247declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 248 249 250define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) { 251; AVX2-LABEL: test_x86_avx2_pminu_b: 252; AVX2: # %bb.0: 253; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xda,0xc1] 254; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 255; 256; AVX512VL-LABEL: test_x86_avx2_pminu_b: 257; AVX512VL: # %bb.0: 258; AVX512VL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1] 259; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 260 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 261 ret <32 x i8> %res 262} 263declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 264 265 266define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) { 267; CHECK-LABEL: test_x86_avx2_pmovmskb: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0] 270; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 271; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 272 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1] 273 ret i32 %res 274} 275declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 276 277 278define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) { 279; AVX2-LABEL: test_x86_avx2_pmulh_w: 280; AVX2: # %bb.0: 281; AVX2-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe5,0xc1] 282; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 283; 284; AVX512VL-LABEL: test_x86_avx2_pmulh_w: 285; AVX512VL: # %bb.0: 286; AVX512VL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] 287; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 288 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 289 ret <16 x i16> %res 290} 291declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 292 293 294define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) { 295; AVX2-LABEL: test_x86_avx2_pmulhu_w: 296; AVX2: # %bb.0: 297; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe4,0xc1] 298; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 299; 300; AVX512VL-LABEL: test_x86_avx2_pmulhu_w: 301; AVX512VL: # %bb.0: 302; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] 303; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 304 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 305 ret <16 x i16> %res 306} 307declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 308 309 310define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) { 311; AVX2-LABEL: test_x86_avx2_psad_bw: 312; AVX2: # %bb.0: 313; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf6,0xc1] 314; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 315; 316; AVX512VL-LABEL: test_x86_avx2_psad_bw: 317; AVX512VL: # %bb.0: 318; AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1] 319; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 320 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] 321 ret <4 x i64> %res 322} 323declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 324 325 326define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { 327; AVX2-LABEL: test_x86_avx2_psll_d: 328; AVX2: # %bb.0: 329; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf2,0xc1] 330; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 331; 332; AVX512VL-LABEL: test_x86_avx2_psll_d: 333; AVX512VL: # %bb.0: 334; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1] 335; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 336 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 337 ret <8 x i32> %res 338} 339declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 340 341 342define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { 343; AVX2-LABEL: test_x86_avx2_psll_q: 344; AVX2: # %bb.0: 345; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf3,0xc1] 346; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 347; 348; AVX512VL-LABEL: test_x86_avx2_psll_q: 349; AVX512VL: # %bb.0: 350; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1] 351; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 352 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 353 ret <4 x i64> %res 354} 355declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 356 357 358define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) { 359; AVX2-LABEL: test_x86_avx2_psll_w: 360; AVX2: # %bb.0: 361; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf1,0xc1] 362; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 363; 364; AVX512VL-LABEL: test_x86_avx2_psll_w: 365; AVX512VL: # %bb.0: 366; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1] 367; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 368 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 369 ret <16 x i16> %res 370} 371declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 372 373 374define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) { 375; AVX2-LABEL: test_x86_avx2_pslli_d: 376; AVX2: # %bb.0: 377; AVX2-NEXT: vpslld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xf0,0x07] 378; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 379; 380; AVX512VL-LABEL: test_x86_avx2_pslli_d: 381; AVX512VL: # %bb.0: 382; AVX512VL-NEXT: vpslld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07] 383; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 384 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 385 ret <8 x i32> %res 386} 387declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 388 389 390define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) { 391; AVX2-LABEL: test_x86_avx2_pslli_q: 392; AVX2: # %bb.0: 393; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xf0,0x07] 394; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 395; 396; AVX512VL-LABEL: test_x86_avx2_pslli_q: 397; AVX512VL: # %bb.0: 398; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07] 399; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 400 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 401 ret <4 x i64> %res 402} 403declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 404 405 406define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) { 407; AVX2-LABEL: test_x86_avx2_pslli_w: 408; AVX2: # %bb.0: 409; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07] 410; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 411; 412; AVX512VL-LABEL: test_x86_avx2_pslli_w: 413; AVX512VL: # %bb.0: 414; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07] 415; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 416 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 417 ret <16 x i16> %res 418} 419declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 420 421 422define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) { 423; AVX2-LABEL: test_x86_avx2_psra_d: 424; AVX2: # %bb.0: 425; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe2,0xc1] 426; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 427; 428; AVX512VL-LABEL: test_x86_avx2_psra_d: 429; AVX512VL: # %bb.0: 430; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1] 431; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 432 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 433 ret <8 x i32> %res 434} 435declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 436 437 438define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) { 439; AVX2-LABEL: test_x86_avx2_psra_w: 440; AVX2: # %bb.0: 441; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe1,0xc1] 442; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 443; 444; AVX512VL-LABEL: test_x86_avx2_psra_w: 445; AVX512VL: # %bb.0: 446; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1] 447; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 448 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 449 ret <16 x i16> %res 450} 451declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 452 453 454define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) { 455; AVX2-LABEL: test_x86_avx2_psrai_d: 456; AVX2: # %bb.0: 457; AVX2-NEXT: vpsrad $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xe0,0x07] 458; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 459; 460; AVX512VL-LABEL: test_x86_avx2_psrai_d: 461; AVX512VL: # %bb.0: 462; AVX512VL-NEXT: vpsrad $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07] 463; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 464 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 465 ret <8 x i32> %res 466} 467declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 468 469 470define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) { 471; AVX2-LABEL: test_x86_avx2_psrai_w: 472; AVX2: # %bb.0: 473; AVX2-NEXT: vpsraw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xe0,0x07] 474; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 475; 476; AVX512VL-LABEL: test_x86_avx2_psrai_w: 477; AVX512VL: # %bb.0: 478; AVX512VL-NEXT: vpsraw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07] 479; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 480 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 481 ret <16 x i16> %res 482} 483declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 484 485 486define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { 487; AVX2-LABEL: test_x86_avx2_psrl_d: 488; AVX2: # %bb.0: 489; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd2,0xc1] 490; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 491; 492; AVX512VL-LABEL: test_x86_avx2_psrl_d: 493; AVX512VL: # %bb.0: 494; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1] 495; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 496 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 497 ret <8 x i32> %res 498} 499declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 500 501 502define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { 503; AVX2-LABEL: test_x86_avx2_psrl_q: 504; AVX2: # %bb.0: 505; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd3,0xc1] 506; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 507; 508; AVX512VL-LABEL: test_x86_avx2_psrl_q: 509; AVX512VL: # %bb.0: 510; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1] 511; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 513 ret <4 x i64> %res 514} 515declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 516 517 518define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) { 519; AVX2-LABEL: test_x86_avx2_psrl_w: 520; AVX2: # %bb.0: 521; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0xc1] 522; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 523; 524; AVX512VL-LABEL: test_x86_avx2_psrl_w: 525; AVX512VL: # %bb.0: 526; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] 527; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 529 ret <16 x i16> %res 530} 531declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 532 533 534define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, <8 x i16>* %p) { 535; X86-AVX-LABEL: test_x86_avx2_psrl_w_load: 536; X86-AVX: # %bb.0: 537; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 538; X86-AVX-NEXT: vpsrlw (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x00] 539; X86-AVX-NEXT: retl # encoding: [0xc3] 540; 541; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: 542; X86-AVX512VL: # %bb.0: 543; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 544; X86-AVX512VL-NEXT: vpsrlw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x00] 545; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 546; 547; X64-AVX-LABEL: test_x86_avx2_psrl_w_load: 548; X64-AVX: # %bb.0: 549; X64-AVX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x07] 550; X64-AVX-NEXT: retq # encoding: [0xc3] 551; 552; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: 553; X64-AVX512VL: # %bb.0: 554; X64-AVX512VL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x07] 555; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 556 %a1 = load <8 x i16>, <8 x i16>* %p 557 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 558 ret <16 x i16> %res 559} 560 561 562define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) { 563; AVX2-LABEL: test_x86_avx2_psrli_d: 564; AVX2: # %bb.0: 565; AVX2-NEXT: vpsrld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xd0,0x07] 566; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 567; 568; AVX512VL-LABEL: test_x86_avx2_psrli_d: 569; AVX512VL: # %bb.0: 570; AVX512VL-NEXT: vpsrld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07] 571; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 572 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 573 ret <8 x i32> %res 574} 575declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 576 577 578define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) { 579; AVX2-LABEL: test_x86_avx2_psrli_q: 580; AVX2: # %bb.0: 581; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xd0,0x07] 582; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 583; 584; AVX512VL-LABEL: test_x86_avx2_psrli_q: 585; AVX512VL: # %bb.0: 586; AVX512VL-NEXT: vpsrlq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07] 587; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 588 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 589 ret <4 x i64> %res 590} 591declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 592 593 594define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { 595; AVX2-LABEL: test_x86_avx2_psrli_w: 596; AVX2: # %bb.0: 597; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xd0,0x07] 598; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 599; 600; AVX512VL-LABEL: test_x86_avx2_psrli_w: 601; AVX512VL: # %bb.0: 602; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07] 603; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 604 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 605 ret <16 x i16> %res 606} 607declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 608 609 610define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { 611; CHECK-LABEL: test_x86_avx2_phadd_d: 612; CHECK: # %bb.0: 613; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x02,0xc1] 614; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 615 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 616 ret <8 x i32> %res 617} 618declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 619 620 621define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) { 622; CHECK-LABEL: test_x86_avx2_phadd_sw: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x03,0xc1] 625; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 626 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 627 ret <16 x i16> %res 628} 629declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 630 631 632define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) { 633; CHECK-LABEL: test_x86_avx2_phadd_w: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x01,0xc1] 636; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 637 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 638 ret <16 x i16> %res 639} 640declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 641 642 643define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) { 644; CHECK-LABEL: test_x86_avx2_phsub_d: 645; CHECK: # %bb.0: 646; CHECK-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x06,0xc1] 647; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 648 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 649 ret <8 x i32> %res 650} 651declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 652 653 654define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) { 655; CHECK-LABEL: test_x86_avx2_phsub_sw: 656; CHECK: # %bb.0: 657; CHECK-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x07,0xc1] 658; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 659 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 660 ret <16 x i16> %res 661} 662declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 663 664 665define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) { 666; CHECK-LABEL: test_x86_avx2_phsub_w: 667; CHECK: # %bb.0: 668; CHECK-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x05,0xc1] 669; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 670 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 671 ret <16 x i16> %res 672} 673declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 674 675 676define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { 677; AVX2-LABEL: test_x86_avx2_pmadd_ub_sw: 678; AVX2: # %bb.0: 679; AVX2-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 680; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 681; 682; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw: 683; AVX512VL: # %bb.0: 684; AVX512VL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 685; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 686 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 687 ret <16 x i16> %res 688} 689declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 690 691; Make sure we don't commute this operation. 692define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) { 693; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 694; X86-AVX: # %bb.0: 695; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 696; X86-AVX-NEXT: vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08] 697; X86-AVX-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0] 698; X86-AVX-NEXT: retl # encoding: [0xc3] 699; 700; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 701; X86-AVX512VL: # %bb.0: 702; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 703; X86-AVX512VL-NEXT: vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08] 704; X86-AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0] 705; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 706; 707; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 708; X64-AVX: # %bb.0: 709; X64-AVX-NEXT: vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f] 710; X64-AVX-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0] 711; X64-AVX-NEXT: retq # encoding: [0xc3] 712; 713; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 714; X64-AVX512VL: # %bb.0: 715; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f] 716; X64-AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0] 717; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 718 %a0 = load <32 x i8>, <32 x i8>* %ptr 719 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 720 ret <16 x i16> %res 721} 722 723define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { 724; AVX2-LABEL: test_x86_avx2_pmul_hr_sw: 725; AVX2: # %bb.0: 726; AVX2-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 727; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 728; 729; AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw: 730; AVX512VL: # %bb.0: 731; AVX512VL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 732; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 733 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 734 ret <16 x i16> %res 735} 736declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 737 738 739define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) { 740; AVX2-LABEL: test_x86_avx2_pshuf_b: 741; AVX2: # %bb.0: 742; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 743; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 744; 745; AVX512VL-LABEL: test_x86_avx2_pshuf_b: 746; AVX512VL: # %bb.0: 747; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 748; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 749 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] 750 ret <32 x i8> %res 751} 752declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 753 754 755define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) { 756; CHECK-LABEL: test_x86_avx2_psign_b: 757; CHECK: # %bb.0: 758; CHECK-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x08,0xc1] 759; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 760 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 761 ret <32 x i8> %res 762} 763declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 764 765 766define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) { 767; CHECK-LABEL: test_x86_avx2_psign_d: 768; CHECK: # %bb.0: 769; CHECK-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0a,0xc1] 770; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 771 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] 772 ret <8 x i32> %res 773} 774declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 775 776 777define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) { 778; CHECK-LABEL: test_x86_avx2_psign_w: 779; CHECK: # %bb.0: 780; CHECK-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x09,0xc1] 781; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 782 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 783 ret <16 x i16> %res 784} 785declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 786 787 788define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 789; CHECK-LABEL: test_x86_avx2_mpsadbw: 790; CHECK: # %bb.0: 791; CHECK-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07] 792; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 793 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] 794 ret <16 x i16> %res 795} 796declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 797 798 799define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { 800; AVX2-LABEL: test_x86_avx2_packusdw: 801; AVX2: # %bb.0: 802; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 803; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 804; 805; AVX512VL-LABEL: test_x86_avx2_packusdw: 806; AVX512VL: # %bb.0: 807; AVX512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 808; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 809 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 810 ret <16 x i16> %res 811} 812declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 813 814 815define <16 x i16> @test_x86_avx2_packusdw_fold() { 816; X86-AVX-LABEL: test_x86_avx2_packusdw_fold: 817; X86-AVX: # %bb.0: 818; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 819; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 820; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 821; X86-AVX-NEXT: retl # encoding: [0xc3] 822; 823; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: 824; X86-AVX512VL: # %bb.0: 825; X86-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 826; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 827; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 828; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 829; 830; X64-AVX-LABEL: test_x86_avx2_packusdw_fold: 831; X64-AVX: # %bb.0: 832; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 833; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 834; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 835; X64-AVX-NEXT: retq # encoding: [0xc3] 836; 837; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: 838; X64-AVX512VL: # %bb.0: 839; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 840; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 841; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 842; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 843 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) 844 ret <16 x i16> %res 845} 846 847 848define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { 849; CHECK-LABEL: test_x86_avx2_pblendvb: 850; CHECK: # %bb.0: 851; CHECK-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20] 852; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 853 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] 854 ret <32 x i8> %res 855} 856declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 857 858 859define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 860; CHECK-LABEL: test_x86_avx2_pblendw: 861; CHECK: # %bb.0: 862; CHECK-NEXT: vpblendw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07] 863; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 864; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 865 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1] 866 ret <16 x i16> %res 867} 868declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone 869 870 871define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) { 872; AVX2-LABEL: test_x86_avx2_pmaxsb: 873; AVX2: # %bb.0: 874; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3c,0xc1] 875; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 876; 877; AVX512VL-LABEL: test_x86_avx2_pmaxsb: 878; AVX512VL: # %bb.0: 879; AVX512VL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1] 880; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 881 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 882 ret <32 x i8> %res 883} 884declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 885 886 887define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) { 888; AVX2-LABEL: test_x86_avx2_pmaxsd: 889; AVX2: # %bb.0: 890; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3d,0xc1] 891; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 892; 893; AVX512VL-LABEL: test_x86_avx2_pmaxsd: 894; AVX512VL: # %bb.0: 895; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1] 896; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 897 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 898 ret <8 x i32> %res 899} 900declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 901 902 903define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) { 904; AVX2-LABEL: test_x86_avx2_pmaxud: 905; AVX2: # %bb.0: 906; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3f,0xc1] 907; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 908; 909; AVX512VL-LABEL: test_x86_avx2_pmaxud: 910; AVX512VL: # %bb.0: 911; AVX512VL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1] 912; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 913 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 914 ret <8 x i32> %res 915} 916declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 917 918 919define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) { 920; AVX2-LABEL: test_x86_avx2_pmaxuw: 921; AVX2: # %bb.0: 922; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3e,0xc1] 923; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 924; 925; AVX512VL-LABEL: test_x86_avx2_pmaxuw: 926; AVX512VL: # %bb.0: 927; AVX512VL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1] 928; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 929 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 930 ret <16 x i16> %res 931} 932declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 933 934 935define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) { 936; AVX2-LABEL: test_x86_avx2_pminsb: 937; AVX2: # %bb.0: 938; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x38,0xc1] 939; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 940; 941; AVX512VL-LABEL: test_x86_avx2_pminsb: 942; AVX512VL: # %bb.0: 943; AVX512VL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1] 944; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 945 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 946 ret <32 x i8> %res 947} 948declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 949 950 951define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) { 952; AVX2-LABEL: test_x86_avx2_pminsd: 953; AVX2: # %bb.0: 954; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x39,0xc1] 955; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 956; 957; AVX512VL-LABEL: test_x86_avx2_pminsd: 958; AVX512VL: # %bb.0: 959; AVX512VL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1] 960; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 961 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 962 ret <8 x i32> %res 963} 964declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 965 966 967define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) { 968; AVX2-LABEL: test_x86_avx2_pminud: 969; AVX2: # %bb.0: 970; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3b,0xc1] 971; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 972; 973; AVX512VL-LABEL: test_x86_avx2_pminud: 974; AVX512VL: # %bb.0: 975; AVX512VL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1] 976; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 977 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 978 ret <8 x i32> %res 979} 980declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 981 982 983define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) { 984; AVX2-LABEL: test_x86_avx2_pminuw: 985; AVX2: # %bb.0: 986; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x3a,0xc1] 987; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 988; 989; AVX512VL-LABEL: test_x86_avx2_pminuw: 990; AVX512VL: # %bb.0: 991; AVX512VL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1] 992; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 993 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 994 ret <16 x i16> %res 995} 996declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 997 998 999define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 1000; CHECK-LABEL: test_x86_avx2_pblendd_128: 1001; CHECK: # %bb.0: 1002; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 1003; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3] 1004; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1005 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] 1006 ret <4 x i32> %res 1007} 1008declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone 1009 1010 1011define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 1012; CHECK-LABEL: test_x86_avx2_pblendd_256: 1013; CHECK: # %bb.0: 1014; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07] 1015; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 1016; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1017 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 1018 ret <8 x i32> %res 1019} 1020declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 1021 1022 1023; Check that the arguments are swapped between the intrinsic definition 1024; and its lowering. Indeed, the offsets are the first source in 1025; the instruction. 1026define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { 1027; AVX2-LABEL: test_x86_avx2_permd: 1028; AVX2: # %bb.0: 1029; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0] 1030; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1031; 1032; AVX512VL-LABEL: test_x86_avx2_permd: 1033; AVX512VL: # %bb.0: 1034; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 1035; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1036 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1037 ret <8 x i32> %res 1038} 1039declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 1040 1041 1042; Check that the arguments are swapped between the intrinsic definition 1043; and its lowering. Indeed, the offsets are the first source in 1044; the instruction. 1045define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) { 1046; AVX2-LABEL: test_x86_avx2_permps: 1047; AVX2: # %bb.0: 1048; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0] 1049; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1050; 1051; AVX512VL-LABEL: test_x86_avx2_permps: 1052; AVX512VL: # %bb.0: 1053; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 1054; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1055 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 1056 ret <8 x float> %res 1057} 1058declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly 1059 1060 1061define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { 1062; X86-LABEL: test_x86_avx2_maskload_q: 1063; X86: # %bb.0: 1064; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1065; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x00] 1066; X86-NEXT: retl # encoding: [0xc3] 1067; 1068; X64-LABEL: test_x86_avx2_maskload_q: 1069; X64: # %bb.0: 1070; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x07] 1071; X64-NEXT: retq # encoding: [0xc3] 1072 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1073 ret <2 x i64> %res 1074} 1075declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 1076 1077 1078define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) { 1079; X86-LABEL: test_x86_avx2_maskload_q_256: 1080; X86: # %bb.0: 1081; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1082; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x00] 1083; X86-NEXT: retl # encoding: [0xc3] 1084; 1085; X64-LABEL: test_x86_avx2_maskload_q_256: 1086; X64: # %bb.0: 1087; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x07] 1088; X64-NEXT: retq # encoding: [0xc3] 1089 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 1090 ret <4 x i64> %res 1091} 1092declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 1093 1094 1095define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) { 1096; X86-LABEL: test_x86_avx2_maskload_d: 1097; X86: # %bb.0: 1098; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1099; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x00] 1100; X86-NEXT: retl # encoding: [0xc3] 1101; 1102; X64-LABEL: test_x86_avx2_maskload_d: 1103; X64: # %bb.0: 1104; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x07] 1105; X64-NEXT: retq # encoding: [0xc3] 1106 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1107 ret <4 x i32> %res 1108} 1109declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 1110 1111 1112define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) { 1113; X86-LABEL: test_x86_avx2_maskload_d_256: 1114; X86: # %bb.0: 1115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1116; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x00] 1117; X86-NEXT: retl # encoding: [0xc3] 1118; 1119; X64-LABEL: test_x86_avx2_maskload_d_256: 1120; X64: # %bb.0: 1121; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x07] 1122; X64-NEXT: retq # encoding: [0xc3] 1123 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1124 ret <8 x i32> %res 1125} 1126declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 1127 1128 1129define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 1130; X86-LABEL: test_x86_avx2_maskstore_q: 1131; X86: # %bb.0: 1132; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1133; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0xf9,0x8e,0x08] 1134; X86-NEXT: retl # encoding: [0xc3] 1135; 1136; X64-LABEL: test_x86_avx2_maskstore_q: 1137; X64: # %bb.0: 1138; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0xf9,0x8e,0x0f] 1139; X64-NEXT: retq # encoding: [0xc3] 1140 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 1141 ret void 1142} 1143declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 1144 1145 1146define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 1147; X86-LABEL: test_x86_avx2_maskstore_q_256: 1148; X86: # %bb.0: 1149; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1150; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0xfd,0x8e,0x08] 1151; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1152; X86-NEXT: retl # encoding: [0xc3] 1153; 1154; X64-LABEL: test_x86_avx2_maskstore_q_256: 1155; X64: # %bb.0: 1156; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0xfd,0x8e,0x0f] 1157; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1158; X64-NEXT: retq # encoding: [0xc3] 1159 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 1160 ret void 1161} 1162declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 1163 1164 1165define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 1166; X86-LABEL: test_x86_avx2_maskstore_d: 1167; X86: # %bb.0: 1168; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1169; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x8e,0x08] 1170; X86-NEXT: retl # encoding: [0xc3] 1171; 1172; X64-LABEL: test_x86_avx2_maskstore_d: 1173; X64: # %bb.0: 1174; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x8e,0x0f] 1175; X64-NEXT: retq # encoding: [0xc3] 1176 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 1177 ret void 1178} 1179declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 1180 1181 1182define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 1183; X86-LABEL: test_x86_avx2_maskstore_d_256: 1184; X86: # %bb.0: 1185; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1186; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x8e,0x08] 1187; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1188; X86-NEXT: retl # encoding: [0xc3] 1189; 1190; X64-LABEL: test_x86_avx2_maskstore_d_256: 1191; X64: # %bb.0: 1192; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x8e,0x0f] 1193; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1194; X64-NEXT: retq # encoding: [0xc3] 1195 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 1196 ret void 1197} 1198declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 1199 1200 1201define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { 1202; AVX2-LABEL: test_x86_avx2_psllv_d: 1203; AVX2: # %bb.0: 1204; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0xc1] 1205; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1206; 1207; AVX512VL-LABEL: test_x86_avx2_psllv_d: 1208; AVX512VL: # %bb.0: 1209; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1] 1210; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1211 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1212 ret <4 x i32> %res 1213} 1214 1215define <4 x i32> @test_x86_avx2_psllv_d_const() { 1216; X86-AVX-LABEL: test_x86_avx2_psllv_d_const: 1217; X86-AVX: # %bb.0: 1218; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] 1219; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1220; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1221; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1222; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1223; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] 1224; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1225; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1226; X86-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1227; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1228; X86-AVX-NEXT: retl # encoding: [0xc3] 1229; 1230; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: 1231; X86-AVX512VL: # %bb.0: 1232; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] 1233; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1234; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1235; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1236; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1237; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] 1238; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1239; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1240; X86-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1241; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1242; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1243; 1244; X64-AVX-LABEL: test_x86_avx2_psllv_d_const: 1245; X64-AVX: # %bb.0: 1246; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] 1247; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1248; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1249; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1250; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1251; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] 1252; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1253; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1254; X64-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1255; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1256; X64-AVX-NEXT: retq # encoding: [0xc3] 1257; 1258; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: 1259; X64-AVX512VL: # %bb.0: 1260; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] 1261; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1262; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1263; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1264; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1265; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] 1266; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1267; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1268; X64-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1269; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1270; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1271 %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>) 1272 %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>) 1273 %res2 = add <4 x i32> %res0, %res1 1274 ret <4 x i32> %res2 1275} 1276declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 1277 1278 1279define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1280; AVX2-LABEL: test_x86_avx2_psllv_d_256: 1281; AVX2: # %bb.0: 1282; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0xc1] 1283; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1284; 1285; AVX512VL-LABEL: test_x86_avx2_psllv_d_256: 1286; AVX512VL: # %bb.0: 1287; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1] 1288; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1289 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1290 ret <8 x i32> %res 1291} 1292 1293define <8 x i32> @test_x86_avx2_psllv_d_256_const() { 1294; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const: 1295; X86-AVX: # %bb.0: 1296; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1297; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1298; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1299; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1300; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1301; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1302; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1303; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1304; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1305; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1306; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1307; X86-AVX-NEXT: retl # encoding: [0xc3] 1308; 1309; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: 1310; X86-AVX512VL: # %bb.0: 1311; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1312; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1313; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1314; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1315; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1316; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] 1317; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1318; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1319; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1320; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1321; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1322; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1323; 1324; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const: 1325; X64-AVX: # %bb.0: 1326; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1327; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1328; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1329; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1330; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1331; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1332; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1333; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1334; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1335; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1336; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1337; X64-AVX-NEXT: retq # encoding: [0xc3] 1338; 1339; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: 1340; X64-AVX512VL: # %bb.0: 1341; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1342; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1343; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1344; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1345; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1346; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] 1347; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1348; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1349; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1350; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1351; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1352; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1353 %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>) 1354 %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>) 1355 %res2 = add <8 x i32> %res0, %res1 1356 ret <8 x i32> %res2 1357} 1358declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1359 1360 1361define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { 1362; AVX2-LABEL: test_x86_avx2_psllv_q: 1363; AVX2: # %bb.0: 1364; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0xc1] 1365; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1366; 1367; AVX512VL-LABEL: test_x86_avx2_psllv_q: 1368; AVX512VL: # %bb.0: 1369; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1] 1370; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1371 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1372 ret <2 x i64> %res 1373} 1374define <2 x i64> @test_x86_avx2_psllv_q_const() { 1375; X86-AVX-LABEL: test_x86_avx2_psllv_q_const: 1376; X86-AVX: # %bb.0: 1377; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,0,4294967295,4294967295] 1378; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1379; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1380; X86-AVX-NEXT: vpsllvq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1381; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1382; X86-AVX-NEXT: retl # encoding: [0xc3] 1383; 1384; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: 1385; X86-AVX512VL: # %bb.0: 1386; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,0,4294967295,4294967295] 1387; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1388; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1389; X86-AVX512VL-NEXT: vpsllvq {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1390; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1391; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1392; 1393; X64-AVX-LABEL: test_x86_avx2_psllv_q_const: 1394; X64-AVX: # %bb.0: 1395; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,18446744073709551615] 1396; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1397; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1398; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1399; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1400; X64-AVX-NEXT: retq # encoding: [0xc3] 1401; 1402; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: 1403; X64-AVX512VL: # %bb.0: 1404; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,18446744073709551615] 1405; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1406; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1407; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1408; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1409; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1410 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>) 1411 ret <2 x i64> %res 1412} 1413declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 1414 1415 1416define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 1417; AVX2-LABEL: test_x86_avx2_psllv_q_256: 1418; AVX2: # %bb.0: 1419; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0xc1] 1420; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1421; 1422; AVX512VL-LABEL: test_x86_avx2_psllv_q_256: 1423; AVX512VL: # %bb.0: 1424; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1] 1425; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1426 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 1427 ret <4 x i64> %res 1428} 1429 1430define <4 x i64> @test_x86_avx2_psllv_q_256_const() { 1431; X86-AVX-LABEL: test_x86_avx2_psllv_q_256_const: 1432; X86-AVX: # %bb.0: 1433; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,0,4,0,4,0,4294967295,4294967295] 1434; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1435; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1436; X86-AVX-NEXT: vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1437; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1438; X86-AVX-NEXT: retl # encoding: [0xc3] 1439; 1440; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: 1441; X86-AVX512VL: # %bb.0: 1442; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,0,4,0,4,0,4294967295,4294967295] 1443; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1444; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1445; X86-AVX512VL-NEXT: vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1446; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1447; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1448; 1449; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const: 1450; X64-AVX: # %bb.0: 1451; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,4,4,18446744073709551615] 1452; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1453; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1454; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1455; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1456; X64-AVX-NEXT: retq # encoding: [0xc3] 1457; 1458; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: 1459; X64-AVX512VL: # %bb.0: 1460; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615] 1461; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1462; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1463; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1464; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1465; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1466 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>) 1467 ret <4 x i64> %res 1468} 1469declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 1470 1471 1472define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { 1473; AVX2-LABEL: test_x86_avx2_psrlv_d: 1474; AVX2: # %bb.0: 1475; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0xc1] 1476; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1477; 1478; AVX512VL-LABEL: test_x86_avx2_psrlv_d: 1479; AVX512VL: # %bb.0: 1480; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1] 1481; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1482 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1483 ret <4 x i32> %res 1484} 1485 1486define <4 x i32> @test_x86_avx2_psrlv_d_const() { 1487; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const: 1488; X86-AVX: # %bb.0: 1489; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] 1490; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1491; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1492; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1493; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1494; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] 1495; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1496; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1497; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1498; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1499; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1500; X86-AVX-NEXT: retl # encoding: [0xc3] 1501; 1502; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: 1503; X86-AVX512VL: # %bb.0: 1504; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] 1505; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1506; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1507; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1508; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1509; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] 1510; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1511; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1512; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1513; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1514; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1515; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1516; 1517; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const: 1518; X64-AVX: # %bb.0: 1519; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] 1520; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1521; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1522; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1523; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1524; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] 1525; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1526; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1527; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1528; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1529; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1530; X64-AVX-NEXT: retq # encoding: [0xc3] 1531; 1532; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: 1533; X64-AVX512VL: # %bb.0: 1534; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] 1535; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1536; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1537; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1538; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1539; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] 1540; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] 1541; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1542; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1543; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1544; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1545; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1546 %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>) 1547 %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>) 1548 %res2 = add <4 x i32> %res0, %res1 1549 ret <4 x i32> %res2 1550} 1551declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 1552 1553 1554define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1555; AVX2-LABEL: test_x86_avx2_psrlv_d_256: 1556; AVX2: # %bb.0: 1557; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0xc1] 1558; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1559; 1560; AVX512VL-LABEL: test_x86_avx2_psrlv_d_256: 1561; AVX512VL: # %bb.0: 1562; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1] 1563; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1564 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1565 ret <8 x i32> %res 1566} 1567 1568define <8 x i32> @test_x86_avx2_psrlv_d_256_const() { 1569; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: 1570; X86-AVX: # %bb.0: 1571; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1572; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1573; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1574; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1575; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1576; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1577; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1578; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1579; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1580; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1581; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1582; X86-AVX-NEXT: retl # encoding: [0xc3] 1583; 1584; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: 1585; X86-AVX512VL: # %bb.0: 1586; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1587; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1588; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1589; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1590; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1591; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] 1592; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1593; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1594; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1595; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1596; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1597; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1598; 1599; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: 1600; X64-AVX: # %bb.0: 1601; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1602; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1603; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1604; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1605; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1606; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1607; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1608; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1609; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1610; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1611; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1612; X64-AVX-NEXT: retq # encoding: [0xc3] 1613; 1614; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: 1615; X64-AVX512VL: # %bb.0: 1616; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1617; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1618; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1619; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1620; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1621; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] 1622; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] 1623; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1624; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1625; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1626; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1627; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1628 %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>) 1629 %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>) 1630 %res2 = add <8 x i32> %res0, %res1 1631 ret <8 x i32> %res2 1632} 1633declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1634 1635 1636define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { 1637; AVX2-LABEL: test_x86_avx2_psrlv_q: 1638; AVX2: # %bb.0: 1639; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0xc1] 1640; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1641; 1642; AVX512VL-LABEL: test_x86_avx2_psrlv_q: 1643; AVX512VL: # %bb.0: 1644; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1] 1645; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1646 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1647 ret <2 x i64> %res 1648} 1649 1650define <2 x i64> @test_x86_avx2_psrlv_q_const() { 1651; X86-AVX-LABEL: test_x86_avx2_psrlv_q_const: 1652; X86-AVX: # %bb.0: 1653; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,0,4,0] 1654; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1655; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1656; X86-AVX-NEXT: vpsrlvq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1657; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1658; X86-AVX-NEXT: retl # encoding: [0xc3] 1659; 1660; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: 1661; X86-AVX512VL: # %bb.0: 1662; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,0,4,0] 1663; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1664; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1665; X86-AVX512VL-NEXT: vpsrlvq {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1666; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1667; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1668; 1669; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: 1670; X64-AVX: # %bb.0: 1671; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4] 1672; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1673; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1674; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1675; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1676; X64-AVX-NEXT: retq # encoding: [0xc3] 1677; 1678; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: 1679; X64-AVX512VL: # %bb.0: 1680; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] 1681; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1682; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1683; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1684; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1685; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1686 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>) 1687 ret <2 x i64> %res 1688} 1689declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 1690 1691 1692define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 1693; AVX2-LABEL: test_x86_avx2_psrlv_q_256: 1694; AVX2: # %bb.0: 1695; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0xc1] 1696; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1697; 1698; AVX512VL-LABEL: test_x86_avx2_psrlv_q_256: 1699; AVX512VL: # %bb.0: 1700; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1] 1701; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1702 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 1703 ret <4 x i64> %res 1704} 1705 1706 1707define <4 x i64> @test_x86_avx2_psrlv_q_256_const() { 1708; X86-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: 1709; X86-AVX: # %bb.0: 1710; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,0,4,0,4,0,4,0] 1711; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1712; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1713; X86-AVX-NEXT: vpsrlvq {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1714; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1715; X86-AVX-NEXT: retl # encoding: [0xc3] 1716; 1717; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: 1718; X86-AVX512VL: # %bb.0: 1719; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,0,4,0,4,0,4,0] 1720; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1721; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1722; X86-AVX512VL-NEXT: vpsrlvq {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1723; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1724; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1725; 1726; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: 1727; X64-AVX: # %bb.0: 1728; X64-AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4] 1729; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] 1730; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1731; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1732; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1733; X64-AVX-NEXT: retq # encoding: [0xc3] 1734; 1735; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: 1736; X64-AVX512VL: # %bb.0: 1737; X64-AVX512VL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4] 1738; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] 1739; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1740; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1741; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1742; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1743 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>) 1744 ret <4 x i64> %res 1745} 1746declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 1747 1748 1749define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { 1750; AVX2-LABEL: test_x86_avx2_psrav_d: 1751; AVX2: # %bb.0: 1752; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0xc1] 1753; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1754; 1755; AVX512VL-LABEL: test_x86_avx2_psrav_d: 1756; AVX512VL: # %bb.0: 1757; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1] 1758; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1759 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1760 ret <4 x i32> %res 1761} 1762 1763define <4 x i32> @test_x86_avx2_psrav_d_const() { 1764; X86-AVX-LABEL: test_x86_avx2_psrav_d_const: 1765; X86-AVX: # %bb.0: 1766; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] 1767; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1768; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1769; X86-AVX-NEXT: vpsravd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1770; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1771; X86-AVX-NEXT: retl # encoding: [0xc3] 1772; 1773; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: 1774; X86-AVX512VL: # %bb.0: 1775; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] 1776; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1777; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1778; X86-AVX512VL-NEXT: vpsravd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1779; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1780; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1781; 1782; X64-AVX-LABEL: test_x86_avx2_psrav_d_const: 1783; X64-AVX: # %bb.0: 1784; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] 1785; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1786; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1787; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1788; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1789; X64-AVX-NEXT: retq # encoding: [0xc3] 1790; 1791; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: 1792; X64-AVX512VL: # %bb.0: 1793; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] 1794; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 1795; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1796; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1797; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1798; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1799 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>) 1800 ret <4 x i32> %res 1801} 1802declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 1803 1804define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1805; AVX2-LABEL: test_x86_avx2_psrav_d_256: 1806; AVX2: # %bb.0: 1807; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0xc1] 1808; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1809; 1810; AVX512VL-LABEL: test_x86_avx2_psrav_d_256: 1811; AVX512VL: # %bb.0: 1812; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1] 1813; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1814 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1815 ret <8 x i32> %res 1816} 1817 1818define <8 x i32> @test_x86_avx2_psrav_d_256_const() { 1819; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const: 1820; X86-AVX: # %bb.0: 1821; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1822; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1823; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1824; X86-AVX-NEXT: vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1825; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1826; X86-AVX-NEXT: retl # encoding: [0xc3] 1827; 1828; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: 1829; X86-AVX512VL: # %bb.0: 1830; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1831; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1832; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 1833; X86-AVX512VL-NEXT: vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1834; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 1835; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1836; 1837; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const: 1838; X64-AVX: # %bb.0: 1839; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1840; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1841; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1842; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1843; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1844; X64-AVX-NEXT: retq # encoding: [0xc3] 1845; 1846; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: 1847; X64-AVX512VL: # %bb.0: 1848; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1849; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 1850; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1851; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1852; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 1853; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1854 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>) 1855 ret <8 x i32> %res 1856} 1857declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1858 1859define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) { 1860; X86-LABEL: test_x86_avx2_gather_d_pd: 1861; X86: # %bb.0: 1862; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1863; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x48] 1864; X86-NEXT: retl # encoding: [0xc3] 1865; 1866; X64-LABEL: test_x86_avx2_gather_d_pd: 1867; X64: # %bb.0: 1868; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] 1869; X64-NEXT: retq # encoding: [0xc3] 1870 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, 1871 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 1872 ret <2 x double> %res 1873} 1874declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, 1875 <4 x i32>, <2 x double>, i8) nounwind readonly 1876 1877define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, <4 x i32> %idx, <4 x double> %mask) { 1878; X86-LABEL: test_x86_avx2_gather_d_pd_256: 1879; X86: # %bb.0: 1880; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1881; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x48] 1882; X86-NEXT: retl # encoding: [0xc3] 1883; 1884; X64-LABEL: test_x86_avx2_gather_d_pd_256: 1885; X64: # %bb.0: 1886; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f] 1887; X64-NEXT: retq # encoding: [0xc3] 1888 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, 1889 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; 1890 ret <4 x double> %res 1891} 1892declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, 1893 <4 x i32>, <4 x double>, i8) nounwind readonly 1894 1895define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) { 1896; X86-LABEL: test_x86_avx2_gather_q_pd: 1897; X86: # %bb.0: 1898; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1899; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x48] 1900; X86-NEXT: retl # encoding: [0xc3] 1901; 1902; X64-LABEL: test_x86_avx2_gather_q_pd: 1903; X64: # %bb.0: 1904; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x4f] 1905; X64-NEXT: retq # encoding: [0xc3] 1906 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, 1907 i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; 1908 ret <2 x double> %res 1909} 1910declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, 1911 <2 x i64>, <2 x double>, i8) nounwind readonly 1912 1913define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, <4 x i64> %idx, <4 x double> %mask) { 1914; X86-LABEL: test_x86_avx2_gather_q_pd_256: 1915; X86: # %bb.0: 1916; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1917; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x48] 1918; X86-NEXT: retl # encoding: [0xc3] 1919; 1920; X64-LABEL: test_x86_avx2_gather_q_pd_256: 1921; X64: # %bb.0: 1922; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x4f] 1923; X64-NEXT: retq # encoding: [0xc3] 1924 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, 1925 i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; 1926 ret <4 x double> %res 1927} 1928declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, 1929 <4 x i64>, <4 x double>, i8) nounwind readonly 1930 1931define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, <4 x i32> %idx, <4 x float> %mask) { 1932; X86-LABEL: test_x86_avx2_gather_d_ps: 1933; X86: # %bb.0: 1934; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1935; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48] 1936; X86-NEXT: retl # encoding: [0xc3] 1937; 1938; X64-LABEL: test_x86_avx2_gather_d_ps: 1939; X64: # %bb.0: 1940; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x4f] 1941; X64-NEXT: retq # encoding: [0xc3] 1942 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, 1943 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 1944 ret <4 x float> %res 1945} 1946declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, 1947 <4 x i32>, <4 x float>, i8) nounwind readonly 1948 1949define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, <8 x i32> %idx, <8 x float> %mask) { 1950; X86-LABEL: test_x86_avx2_gather_d_ps_256: 1951; X86: # %bb.0: 1952; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1953; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x48] 1954; X86-NEXT: retl # encoding: [0xc3] 1955; 1956; X64-LABEL: test_x86_avx2_gather_d_ps_256: 1957; X64: # %bb.0: 1958; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x4f] 1959; X64-NEXT: retq # encoding: [0xc3] 1960 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1961 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; 1962 ret <8 x float> %res 1963} 1964declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, 1965 <8 x i32>, <8 x float>, i8) nounwind readonly 1966 1967define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, <2 x i64> %idx, <4 x float> %mask) { 1968; X86-LABEL: test_x86_avx2_gather_q_ps: 1969; X86: # %bb.0: 1970; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1971; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x48] 1972; X86-NEXT: retl # encoding: [0xc3] 1973; 1974; X64-LABEL: test_x86_avx2_gather_q_ps: 1975; X64: # %bb.0: 1976; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x4f] 1977; X64-NEXT: retq # encoding: [0xc3] 1978 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, 1979 i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; 1980 ret <4 x float> %res 1981} 1982declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, 1983 <2 x i64>, <4 x float>, i8) nounwind readonly 1984 1985define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, <4 x i64> %idx, <4 x float> %mask) { 1986; X86-LABEL: test_x86_avx2_gather_q_ps_256: 1987; X86: # %bb.0: 1988; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1989; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x48] 1990; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1991; X86-NEXT: retl # encoding: [0xc3] 1992; 1993; X64-LABEL: test_x86_avx2_gather_q_ps_256: 1994; X64: # %bb.0: 1995; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x4f] 1996; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1997; X64-NEXT: retq # encoding: [0xc3] 1998 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, 1999 i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; 2000 ret <4 x float> %res 2001} 2002declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, 2003 <4 x i64>, <4 x float>, i8) nounwind readonly 2004 2005define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, <4 x i32> %idx, <2 x i64> %mask) { 2006; X86-LABEL: test_x86_avx2_gather_d_q: 2007; X86: # %bb.0: 2008; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2009; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x48] 2010; X86-NEXT: retl # encoding: [0xc3] 2011; 2012; X64-LABEL: test_x86_avx2_gather_d_q: 2013; X64: # %bb.0: 2014; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f] 2015; X64-NEXT: retq # encoding: [0xc3] 2016 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, 2017 i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; 2018 ret <2 x i64> %res 2019} 2020declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, 2021 <4 x i32>, <2 x i64>, i8) nounwind readonly 2022 2023define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, <4 x i32> %idx, <4 x i64> %mask) { 2024; X86-LABEL: test_x86_avx2_gather_d_q_256: 2025; X86: # %bb.0: 2026; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2027; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x48] 2028; X86-NEXT: retl # encoding: [0xc3] 2029; 2030; X64-LABEL: test_x86_avx2_gather_d_q_256: 2031; X64: # %bb.0: 2032; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f] 2033; X64-NEXT: retq # encoding: [0xc3] 2034 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, 2035 i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; 2036 ret <4 x i64> %res 2037} 2038declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, 2039 <4 x i32>, <4 x i64>, i8) nounwind readonly 2040 2041define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i64> %mask) { 2042; X86-LABEL: test_x86_avx2_gather_q_q: 2043; X86: # %bb.0: 2044; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2045; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 2046; X86-NEXT: retl # encoding: [0xc3] 2047; 2048; X64-LABEL: test_x86_avx2_gather_q_q: 2049; X64: # %bb.0: 2050; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 2051; X64-NEXT: retq # encoding: [0xc3] 2052 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 2053 i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; 2054 ret <2 x i64> %res 2055} 2056declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, 2057 <2 x i64>, <2 x i64>, i8) nounwind readonly 2058 2059define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, <4 x i64> %idx, <4 x i64> %mask) { 2060; X86-LABEL: test_x86_avx2_gather_q_q_256: 2061; X86: # %bb.0: 2062; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2063; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x48] 2064; X86-NEXT: retl # encoding: [0xc3] 2065; 2066; X64-LABEL: test_x86_avx2_gather_q_q_256: 2067; X64: # %bb.0: 2068; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x4f] 2069; X64-NEXT: retq # encoding: [0xc3] 2070 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, 2071 i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; 2072 ret <4 x i64> %res 2073} 2074declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, 2075 <4 x i64>, <4 x i64>, i8) nounwind readonly 2076 2077define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, <4 x i32> %idx, <4 x i32> %mask) { 2078; X86-LABEL: test_x86_avx2_gather_d_d: 2079; X86: # %bb.0: 2080; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2081; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x48] 2082; X86-NEXT: retl # encoding: [0xc3] 2083; 2084; X64-LABEL: test_x86_avx2_gather_d_d: 2085; X64: # %bb.0: 2086; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x4f] 2087; X64-NEXT: retq # encoding: [0xc3] 2088 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, 2089 i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; 2090 ret <4 x i32> %res 2091} 2092declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, 2093 <4 x i32>, <4 x i32>, i8) nounwind readonly 2094 2095define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, <8 x i32> %idx, <8 x i32> %mask) { 2096; X86-LABEL: test_x86_avx2_gather_d_d_256: 2097; X86: # %bb.0: 2098; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2099; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x48] 2100; X86-NEXT: retl # encoding: [0xc3] 2101; 2102; X64-LABEL: test_x86_avx2_gather_d_d_256: 2103; X64: # %bb.0: 2104; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x4f] 2105; X64-NEXT: retq # encoding: [0xc3] 2106 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, 2107 i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; 2108 ret <8 x i32> %res 2109} 2110declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, 2111 <8 x i32>, <8 x i32>, i8) nounwind readonly 2112 2113define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, <2 x i64> %idx, <4 x i32> %mask) { 2114; X86-LABEL: test_x86_avx2_gather_q_d: 2115; X86: # %bb.0: 2116; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2117; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x48] 2118; X86-NEXT: retl # encoding: [0xc3] 2119; 2120; X64-LABEL: test_x86_avx2_gather_q_d: 2121; X64: # %bb.0: 2122; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x4f] 2123; X64-NEXT: retq # encoding: [0xc3] 2124 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, 2125 i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; 2126 ret <4 x i32> %res 2127} 2128declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, 2129 <2 x i64>, <4 x i32>, i8) nounwind readonly 2130 2131define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, <4 x i64> %idx, <4 x i32> %mask) { 2132; X86-LABEL: test_x86_avx2_gather_q_d_256: 2133; X86: # %bb.0: 2134; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2135; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x48] 2136; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2137; X86-NEXT: retl # encoding: [0xc3] 2138; 2139; X64-LABEL: test_x86_avx2_gather_q_d_256: 2140; X64: # %bb.0: 2141; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x4f] 2142; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2143; X64-NEXT: retq # encoding: [0xc3] 2144 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, 2145 i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; 2146 ret <4 x i32> %res 2147} 2148declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, 2149 <4 x i64>, <4 x i32>, i8) nounwind readonly 2150 2151; PR13298 2152define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx, <8 x float> %mask, float* nocapture %out) { 2153;; gather with mask 2154; X86-AVX-LABEL: test_gather_mask: 2155; X86-AVX: # %bb.0: 2156; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2157; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 2158; X86-AVX-NEXT: vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda] 2159; X86-AVX-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] 2160; X86-AVX-NEXT: vmovups %ymm2, (%eax) # encoding: [0xc5,0xfc,0x11,0x10] 2161; X86-AVX-NEXT: retl # encoding: [0xc3] 2162; 2163; X86-AVX512VL-LABEL: test_gather_mask: 2164; X86-AVX512VL: # %bb.0: 2165; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2166; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 2167; X86-AVX512VL-NEXT: vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] 2168; X86-AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] 2169; X86-AVX512VL-NEXT: vmovups %ymm2, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] 2170; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 2171; 2172; X64-AVX-LABEL: test_gather_mask: 2173; X64-AVX: # %bb.0: 2174; X64-AVX-NEXT: vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda] 2175; X64-AVX-NEXT: vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f] 2176; X64-AVX-NEXT: vmovups %ymm2, (%rsi) # encoding: [0xc5,0xfc,0x11,0x16] 2177; X64-AVX-NEXT: retq # encoding: [0xc3] 2178; 2179; X64-AVX512VL-LABEL: test_gather_mask: 2180; X64-AVX512VL: # %bb.0: 2181; X64-AVX512VL-NEXT: vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] 2182; X64-AVX512VL-NEXT: vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f] 2183; X64-AVX512VL-NEXT: vmovups %ymm2, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x16] 2184; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 2185 %a_i8 = bitcast float* %a to i8* 2186 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 2187 i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 2188 2189;; for debugging, we'll just dump out the mask 2190 %out_ptr = bitcast float * %out to <8 x float> * 2191 store <8 x float> %mask, <8 x float> * %out_ptr, align 4 2192 2193 ret <8 x float> %res 2194} 2195 2196define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i1> %mask) { 2197; X86-AVX-LABEL: test_mask_demanded_bits: 2198; X86-AVX: # %bb.0: 2199; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2200; X86-AVX-NEXT: vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2201; X86-AVX-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 2202; X86-AVX-NEXT: retl # encoding: [0xc3] 2203; 2204; X86-AVX512VL-LABEL: test_mask_demanded_bits: 2205; X86-AVX512VL: # %bb.0: 2206; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2207; X86-AVX512VL-NEXT: vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2208; X86-AVX512VL-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 2209; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 2210; 2211; X64-AVX-LABEL: test_mask_demanded_bits: 2212; X64-AVX: # %bb.0: 2213; X64-AVX-NEXT: vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2214; X64-AVX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 2215; X64-AVX-NEXT: retq # encoding: [0xc3] 2216; 2217; X64-AVX512VL-LABEL: test_mask_demanded_bits: 2218; X64-AVX512VL: # %bb.0: 2219; X64-AVX512VL-NEXT: vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2220; X64-AVX512VL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 2221; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 2222 %mask1 = sext <2 x i1> %mask to <2 x i64> 2223 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 2224 i8* %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ; 2225 ret <2 x i64> %res 2226} 2227