1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE1 3; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE2 4; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 5; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 6; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE1 7; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE2 8; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 9; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 10 11; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 12 13define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 14; SSE-LABEL: test_mm_add_ps: 15; SSE: # %bb.0: 16; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] 17; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 18; 19; AVX1-LABEL: test_mm_add_ps: 20; AVX1: # %bb.0: 21; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] 22; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 23; 24; AVX512-LABEL: test_mm_add_ps: 25; AVX512: # %bb.0: 26; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 27; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 28 %res = fadd <4 x float> %a0, %a1 29 ret <4 x float> %res 30} 31 32define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 33; SSE-LABEL: test_mm_add_ss: 34; SSE: # %bb.0: 35; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] 36; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 37; 38; AVX1-LABEL: test_mm_add_ss: 39; AVX1: # %bb.0: 40; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] 41; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 42; 43; AVX512-LABEL: test_mm_add_ss: 44; AVX512: # %bb.0: 45; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 46; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 47 %ext0 = extractelement <4 x float> %a0, i32 0 48 %ext1 = extractelement <4 x float> %a1, i32 0 49 %fadd = fadd float %ext0, %ext1 50 %res = insertelement <4 x float> %a0, float %fadd, i32 0 51 ret <4 x float> %res 52} 53 54define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 55; SSE-LABEL: test_mm_and_ps: 56; SSE: # %bb.0: 57; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 58; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 59; 60; AVX1-LABEL: test_mm_and_ps: 61; AVX1: # %bb.0: 62; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 63; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64; 65; AVX512-LABEL: test_mm_and_ps: 66; AVX512: # %bb.0: 67; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 68; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 69 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 70 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 71 %res = and <4 x i32> %arg0, %arg1 72 %bc = bitcast <4 x i32> %res to <4 x float> 73 ret <4 x float> %bc 74} 75 76define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 77; X86-SSE1-LABEL: test_mm_andnot_ps: 78; X86-SSE1: # %bb.0: 79; X86-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 80; X86-SSE1-NEXT: retl # encoding: [0xc3] 81; 82; X86-SSE2-LABEL: test_mm_andnot_ps: 83; X86-SSE2: # %bb.0: 84; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 85; X86-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 86; X86-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 87; X86-SSE2-NEXT: retl # encoding: [0xc3] 88; 89; AVX1-LABEL: test_mm_andnot_ps: 90; AVX1: # %bb.0: 91; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 92; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 93; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 94; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 95; 96; AVX512-LABEL: test_mm_andnot_ps: 97; AVX512: # %bb.0: 98; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 99; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 100; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 101; 102; X64-SSE1-LABEL: test_mm_andnot_ps: 103; X64-SSE1: # %bb.0: 104; X64-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 105; X64-SSE1-NEXT: retq # encoding: [0xc3] 106; 107; X64-SSE2-LABEL: test_mm_andnot_ps: 108; X64-SSE2: # %bb.0: 109; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 110; X64-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 111; X64-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 112; X64-SSE2-NEXT: retq # encoding: [0xc3] 113 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 114 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 115 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 116 %res = and <4 x i32> %not, %arg1 117 %bc = bitcast <4 x i32> %res to <4 x float> 118 ret <4 x float> %bc 119} 120 121define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 122; SSE-LABEL: test_mm_cmpeq_ps: 123; SSE: # %bb.0: 124; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] 125; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 126; 127; AVX1-LABEL: test_mm_cmpeq_ps: 128; AVX1: # %bb.0: 129; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] 130; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 131; 132; AVX512-LABEL: test_mm_cmpeq_ps: 133; AVX512: # %bb.0: 134; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] 135; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 136; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 137 %cmp = fcmp oeq <4 x float> %a0, %a1 138 %sext = sext <4 x i1> %cmp to <4 x i32> 139 %res = bitcast <4 x i32> %sext to <4 x float> 140 ret <4 x float> %res 141} 142 143define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 144; SSE-LABEL: test_mm_cmpeq_ss: 145; SSE: # %bb.0: 146; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] 147; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 148; 149; AVX-LABEL: test_mm_cmpeq_ss: 150; AVX: # %bb.0: 151; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] 152; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 153 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 154 ret <4 x float> %res 155} 156declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 157 158define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 159; SSE-LABEL: test_mm_cmpge_ps: 160; SSE: # %bb.0: 161; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] 162; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 163; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 164; 165; AVX1-LABEL: test_mm_cmpge_ps: 166; AVX1: # %bb.0: 167; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] 168; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 169; 170; AVX512-LABEL: test_mm_cmpge_ps: 171; AVX512: # %bb.0: 172; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] 173; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 174; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 175 %cmp = fcmp ole <4 x float> %a1, %a0 176 %sext = sext <4 x i1> %cmp to <4 x i32> 177 %res = bitcast <4 x i32> %sext to <4 x float> 178 ret <4 x float> %res 179} 180 181define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 182; SSE-LABEL: test_mm_cmpge_ss: 183; SSE: # %bb.0: 184; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] 185; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 186; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 187; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 188; 189; AVX-LABEL: test_mm_cmpge_ss: 190; AVX: # %bb.0: 191; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] 192; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 193; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 194; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 195 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 196 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 197 ret <4 x float> %res 198} 199 200define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 201; SSE-LABEL: test_mm_cmpgt_ps: 202; SSE: # %bb.0: 203; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] 204; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 205; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 206; 207; AVX1-LABEL: test_mm_cmpgt_ps: 208; AVX1: # %bb.0: 209; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] 210; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 211; 212; AVX512-LABEL: test_mm_cmpgt_ps: 213; AVX512: # %bb.0: 214; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] 215; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 216; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 217 %cmp = fcmp olt <4 x float> %a1, %a0 218 %sext = sext <4 x i1> %cmp to <4 x i32> 219 %res = bitcast <4 x i32> %sext to <4 x float> 220 ret <4 x float> %res 221} 222 223define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 224; SSE-LABEL: test_mm_cmpgt_ss: 225; SSE: # %bb.0: 226; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] 227; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 228; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 229; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 230; 231; AVX-LABEL: test_mm_cmpgt_ss: 232; AVX: # %bb.0: 233; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] 234; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 235; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 236; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 237 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 238 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 239 ret <4 x float> %res 240} 241 242define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 243; SSE-LABEL: test_mm_cmple_ps: 244; SSE: # %bb.0: 245; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] 246; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 247; 248; AVX1-LABEL: test_mm_cmple_ps: 249; AVX1: # %bb.0: 250; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] 251; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 252; 253; AVX512-LABEL: test_mm_cmple_ps: 254; AVX512: # %bb.0: 255; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 256; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 257; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 258 %cmp = fcmp ole <4 x float> %a0, %a1 259 %sext = sext <4 x i1> %cmp to <4 x i32> 260 %res = bitcast <4 x i32> %sext to <4 x float> 261 ret <4 x float> %res 262} 263 264define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 265; SSE-LABEL: test_mm_cmple_ss: 266; SSE: # %bb.0: 267; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] 268; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 269; 270; AVX-LABEL: test_mm_cmple_ss: 271; AVX: # %bb.0: 272; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] 273; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 274 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 275 ret <4 x float> %res 276} 277 278define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 279; SSE-LABEL: test_mm_cmplt_ps: 280; SSE: # %bb.0: 281; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] 282; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 283; 284; AVX1-LABEL: test_mm_cmplt_ps: 285; AVX1: # %bb.0: 286; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] 287; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 288; 289; AVX512-LABEL: test_mm_cmplt_ps: 290; AVX512: # %bb.0: 291; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 292; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 293; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %cmp = fcmp olt <4 x float> %a0, %a1 295 %sext = sext <4 x i1> %cmp to <4 x i32> 296 %res = bitcast <4 x i32> %sext to <4 x float> 297 ret <4 x float> %res 298} 299 300define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 301; SSE-LABEL: test_mm_cmplt_ss: 302; SSE: # %bb.0: 303; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] 304; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 305; 306; AVX-LABEL: test_mm_cmplt_ss: 307; AVX: # %bb.0: 308; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] 309; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 310 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 311 ret <4 x float> %res 312} 313 314define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 315; SSE-LABEL: test_mm_cmpneq_ps: 316; SSE: # %bb.0: 317; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] 318; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 319; 320; AVX1-LABEL: test_mm_cmpneq_ps: 321; AVX1: # %bb.0: 322; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] 323; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 324; 325; AVX512-LABEL: test_mm_cmpneq_ps: 326; AVX512: # %bb.0: 327; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] 328; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 329; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 330 %cmp = fcmp une <4 x float> %a0, %a1 331 %sext = sext <4 x i1> %cmp to <4 x i32> 332 %res = bitcast <4 x i32> %sext to <4 x float> 333 ret <4 x float> %res 334} 335 336define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 337; SSE-LABEL: test_mm_cmpneq_ss: 338; SSE: # %bb.0: 339; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] 340; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 341; 342; AVX-LABEL: test_mm_cmpneq_ss: 343; AVX: # %bb.0: 344; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] 345; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 347 ret <4 x float> %res 348} 349 350define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 351; SSE-LABEL: test_mm_cmpnge_ps: 352; SSE: # %bb.0: 353; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] 354; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 355; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 356; 357; AVX1-LABEL: test_mm_cmpnge_ps: 358; AVX1: # %bb.0: 359; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] 360; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 361; 362; AVX512-LABEL: test_mm_cmpnge_ps: 363; AVX512: # %bb.0: 364; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] 365; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 366; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 367 %cmp = fcmp ugt <4 x float> %a1, %a0 368 %sext = sext <4 x i1> %cmp to <4 x i32> 369 %res = bitcast <4 x i32> %sext to <4 x float> 370 ret <4 x float> %res 371} 372 373define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 374; SSE-LABEL: test_mm_cmpnge_ss: 375; SSE: # %bb.0: 376; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] 377; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 378; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 379; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 380; 381; AVX-LABEL: test_mm_cmpnge_ss: 382; AVX: # %bb.0: 383; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] 384; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 385; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 386; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 387 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 388 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 389 ret <4 x float> %res 390} 391 392define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 393; SSE-LABEL: test_mm_cmpngt_ps: 394; SSE: # %bb.0: 395; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] 396; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 397; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 398; 399; AVX1-LABEL: test_mm_cmpngt_ps: 400; AVX1: # %bb.0: 401; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] 402; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 403; 404; AVX512-LABEL: test_mm_cmpngt_ps: 405; AVX512: # %bb.0: 406; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] 407; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 408; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 409 %cmp = fcmp uge <4 x float> %a1, %a0 410 %sext = sext <4 x i1> %cmp to <4 x i32> 411 %res = bitcast <4 x i32> %sext to <4 x float> 412 ret <4 x float> %res 413} 414 415define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 416; SSE-LABEL: test_mm_cmpngt_ss: 417; SSE: # %bb.0: 418; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] 419; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 420; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 421; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 422; 423; AVX-LABEL: test_mm_cmpngt_ss: 424; AVX: # %bb.0: 425; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] 426; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 427; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 428; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 429 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 430 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 431 ret <4 x float> %res 432} 433 434define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 435; SSE-LABEL: test_mm_cmpnle_ps: 436; SSE: # %bb.0: 437; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] 438; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 439; 440; AVX1-LABEL: test_mm_cmpnle_ps: 441; AVX1: # %bb.0: 442; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] 443; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 444; 445; AVX512-LABEL: test_mm_cmpnle_ps: 446; AVX512: # %bb.0: 447; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] 448; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 449; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 450 %cmp = fcmp ugt <4 x float> %a0, %a1 451 %sext = sext <4 x i1> %cmp to <4 x i32> 452 %res = bitcast <4 x i32> %sext to <4 x float> 453 ret <4 x float> %res 454} 455 456define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 457; SSE-LABEL: test_mm_cmpnle_ss: 458; SSE: # %bb.0: 459; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] 460; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 461; 462; AVX-LABEL: test_mm_cmpnle_ss: 463; AVX: # %bb.0: 464; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] 465; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 466 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 467 ret <4 x float> %res 468} 469 470define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 471; SSE-LABEL: test_mm_cmpnlt_ps: 472; SSE: # %bb.0: 473; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] 474; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 475; 476; AVX1-LABEL: test_mm_cmpnlt_ps: 477; AVX1: # %bb.0: 478; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] 479; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 480; 481; AVX512-LABEL: test_mm_cmpnlt_ps: 482; AVX512: # %bb.0: 483; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] 484; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 485; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 486 %cmp = fcmp uge <4 x float> %a0, %a1 487 %sext = sext <4 x i1> %cmp to <4 x i32> 488 %res = bitcast <4 x i32> %sext to <4 x float> 489 ret <4 x float> %res 490} 491 492define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 493; SSE-LABEL: test_mm_cmpnlt_ss: 494; SSE: # %bb.0: 495; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] 496; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 497; 498; AVX-LABEL: test_mm_cmpnlt_ss: 499; AVX: # %bb.0: 500; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] 501; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 502 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 503 ret <4 x float> %res 504} 505 506define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 507; SSE-LABEL: test_mm_cmpord_ps: 508; SSE: # %bb.0: 509; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] 510; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 511; 512; AVX1-LABEL: test_mm_cmpord_ps: 513; AVX1: # %bb.0: 514; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] 515; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 516; 517; AVX512-LABEL: test_mm_cmpord_ps: 518; AVX512: # %bb.0: 519; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] 520; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 521; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 522 %cmp = fcmp ord <4 x float> %a0, %a1 523 %sext = sext <4 x i1> %cmp to <4 x i32> 524 %res = bitcast <4 x i32> %sext to <4 x float> 525 ret <4 x float> %res 526} 527 528define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 529; SSE-LABEL: test_mm_cmpord_ss: 530; SSE: # %bb.0: 531; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] 532; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 533; 534; AVX-LABEL: test_mm_cmpord_ss: 535; AVX: # %bb.0: 536; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] 537; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 538 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 539 ret <4 x float> %res 540} 541 542define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 543; SSE-LABEL: test_mm_cmpunord_ps: 544; SSE: # %bb.0: 545; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] 546; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 547; 548; AVX1-LABEL: test_mm_cmpunord_ps: 549; AVX1: # %bb.0: 550; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] 551; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 552; 553; AVX512-LABEL: test_mm_cmpunord_ps: 554; AVX512: # %bb.0: 555; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] 556; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 557; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 558 %cmp = fcmp uno <4 x float> %a0, %a1 559 %sext = sext <4 x i1> %cmp to <4 x i32> 560 %res = bitcast <4 x i32> %sext to <4 x float> 561 ret <4 x float> %res 562} 563 564define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 565; SSE-LABEL: test_mm_cmpunord_ss: 566; SSE: # %bb.0: 567; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] 568; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 569; 570; AVX-LABEL: test_mm_cmpunord_ss: 571; AVX: # %bb.0: 572; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] 573; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 574 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 575 ret <4 x float> %res 576} 577 578define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 579; SSE-LABEL: test_mm_comieq_ss: 580; SSE: # %bb.0: 581; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 582; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 583; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 584; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 585; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 586; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 587; 588; AVX1-LABEL: test_mm_comieq_ss: 589; AVX1: # %bb.0: 590; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 591; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 592; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 593; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 594; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 595; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 596; 597; AVX512-LABEL: test_mm_comieq_ss: 598; AVX512: # %bb.0: 599; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 600; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 601; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 602; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 603; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 604; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 605 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 606 ret i32 %res 607} 608declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 609 610define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 611; SSE-LABEL: test_mm_comige_ss: 612; SSE: # %bb.0: 613; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 614; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 615; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 616; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 617; 618; AVX1-LABEL: test_mm_comige_ss: 619; AVX1: # %bb.0: 620; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 621; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 622; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 623; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 624; 625; AVX512-LABEL: test_mm_comige_ss: 626; AVX512: # %bb.0: 627; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 628; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 629; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 630; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 631 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 632 ret i32 %res 633} 634declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 635 636define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 637; SSE-LABEL: test_mm_comigt_ss: 638; SSE: # %bb.0: 639; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 640; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 641; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 642; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 643; 644; AVX1-LABEL: test_mm_comigt_ss: 645; AVX1: # %bb.0: 646; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 647; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 648; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 649; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 650; 651; AVX512-LABEL: test_mm_comigt_ss: 652; AVX512: # %bb.0: 653; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 654; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 655; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 656; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 657 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 658 ret i32 %res 659} 660declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 661 662define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 663; SSE-LABEL: test_mm_comile_ss: 664; SSE: # %bb.0: 665; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 666; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 667; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 668; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 669; 670; AVX1-LABEL: test_mm_comile_ss: 671; AVX1: # %bb.0: 672; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 673; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 674; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 675; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 676; 677; AVX512-LABEL: test_mm_comile_ss: 678; AVX512: # %bb.0: 679; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 680; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 681; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 682; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 683 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 684 ret i32 %res 685} 686declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 687 688define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 689; SSE-LABEL: test_mm_comilt_ss: 690; SSE: # %bb.0: 691; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 692; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 693; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 694; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 695; 696; AVX1-LABEL: test_mm_comilt_ss: 697; AVX1: # %bb.0: 698; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 699; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 700; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 701; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 702; 703; AVX512-LABEL: test_mm_comilt_ss: 704; AVX512: # %bb.0: 705; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 706; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 707; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 708; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 709 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 710 ret i32 %res 711} 712declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 713 714define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 715; SSE-LABEL: test_mm_comineq_ss: 716; SSE: # %bb.0: 717; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 718; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 719; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 720; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 721; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 722; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 723; 724; AVX1-LABEL: test_mm_comineq_ss: 725; AVX1: # %bb.0: 726; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 727; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 728; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 729; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 730; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 731; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 732; 733; AVX512-LABEL: test_mm_comineq_ss: 734; AVX512: # %bb.0: 735; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 736; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 737; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 738; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 739; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 740; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 741 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 742 ret i32 %res 743} 744declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 745 746define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 747; SSE-LABEL: test_mm_cvt_ss2si: 748; SSE: # %bb.0: 749; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 750; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 751; 752; AVX1-LABEL: test_mm_cvt_ss2si: 753; AVX1: # %bb.0: 754; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 755; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 756; 757; AVX512-LABEL: test_mm_cvt_ss2si: 758; AVX512: # %bb.0: 759; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 760; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 761 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 762 ret i32 %res 763} 764declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 765 766define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 767; X86-SSE-LABEL: test_mm_cvtsi32_ss: 768; X86-SSE: # %bb.0: 769; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 770; X86-SSE-NEXT: retl # encoding: [0xc3] 771; 772; X86-AVX1-LABEL: test_mm_cvtsi32_ss: 773; X86-AVX1: # %bb.0: 774; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 775; X86-AVX1-NEXT: retl # encoding: [0xc3] 776; 777; X86-AVX512-LABEL: test_mm_cvtsi32_ss: 778; X86-AVX512: # %bb.0: 779; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 780; X86-AVX512-NEXT: retl # encoding: [0xc3] 781; 782; X64-SSE-LABEL: test_mm_cvtsi32_ss: 783; X64-SSE: # %bb.0: 784; X64-SSE-NEXT: cvtsi2ss %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] 785; X64-SSE-NEXT: retq # encoding: [0xc3] 786; 787; X64-AVX1-LABEL: test_mm_cvtsi32_ss: 788; X64-AVX1: # %bb.0: 789; X64-AVX1-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] 790; X64-AVX1-NEXT: retq # encoding: [0xc3] 791; 792; X64-AVX512-LABEL: test_mm_cvtsi32_ss: 793; X64-AVX512: # %bb.0: 794; X64-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 795; X64-AVX512-NEXT: retq # encoding: [0xc3] 796 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) 797 ret <4 x float> %res 798} 799declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 800 801define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 802; X86-SSE-LABEL: test_mm_cvtss_f32: 803; X86-SSE: # %bb.0: 804; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 805; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 806; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 807; X86-SSE-NEXT: popl %eax # encoding: [0x58] 808; X86-SSE-NEXT: retl # encoding: [0xc3] 809; 810; X86-AVX1-LABEL: test_mm_cvtss_f32: 811; X86-AVX1: # %bb.0: 812; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 813; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 814; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 815; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 816; X86-AVX1-NEXT: retl # encoding: [0xc3] 817; 818; X86-AVX512-LABEL: test_mm_cvtss_f32: 819; X86-AVX512: # %bb.0: 820; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 821; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 822; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 823; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 824; X86-AVX512-NEXT: retl # encoding: [0xc3] 825; 826; X64-LABEL: test_mm_cvtss_f32: 827; X64: # %bb.0: 828; X64-NEXT: retq # encoding: [0xc3] 829 %res = extractelement <4 x float> %a0, i32 0 830 ret float %res 831} 832 833define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 834; SSE-LABEL: test_mm_cvtss_si32: 835; SSE: # %bb.0: 836; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 837; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 838; 839; AVX1-LABEL: test_mm_cvtss_si32: 840; AVX1: # %bb.0: 841; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 842; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 843; 844; AVX512-LABEL: test_mm_cvtss_si32: 845; AVX512: # %bb.0: 846; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 847; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 848 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 849 ret i32 %res 850} 851 852define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 853; SSE-LABEL: test_mm_cvttss_si: 854; SSE: # %bb.0: 855; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 856; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 857; 858; AVX1-LABEL: test_mm_cvttss_si: 859; AVX1: # %bb.0: 860; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 861; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 862; 863; AVX512-LABEL: test_mm_cvttss_si: 864; AVX512: # %bb.0: 865; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 866; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 867 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 868 ret i32 %res 869} 870declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 871 872define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 873; SSE-LABEL: test_mm_cvttss_si32: 874; SSE: # %bb.0: 875; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 876; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 877; 878; AVX1-LABEL: test_mm_cvttss_si32: 879; AVX1: # %bb.0: 880; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 881; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 882; 883; AVX512-LABEL: test_mm_cvttss_si32: 884; AVX512: # %bb.0: 885; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 886; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 887 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 888 ret i32 %res 889} 890 891define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 892; SSE-LABEL: test_mm_div_ps: 893; SSE: # %bb.0: 894; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] 895; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 896; 897; AVX1-LABEL: test_mm_div_ps: 898; AVX1: # %bb.0: 899; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] 900; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 901; 902; AVX512-LABEL: test_mm_div_ps: 903; AVX512: # %bb.0: 904; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 905; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 906 %res = fdiv <4 x float> %a0, %a1 907 ret <4 x float> %res 908} 909 910define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 911; SSE-LABEL: test_mm_div_ss: 912; SSE: # %bb.0: 913; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] 914; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 915; 916; AVX1-LABEL: test_mm_div_ss: 917; AVX1: # %bb.0: 918; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] 919; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 920; 921; AVX512-LABEL: test_mm_div_ss: 922; AVX512: # %bb.0: 923; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 924; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 925 %ext0 = extractelement <4 x float> %a0, i32 0 926 %ext1 = extractelement <4 x float> %a1, i32 0 927 %fdiv = fdiv float %ext0, %ext1 928 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 929 ret <4 x float> %res 930} 931 932define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 933; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 934; X86-SSE: # %bb.0: 935; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 936; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 937; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 938; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 939; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 940; X86-SSE-NEXT: # imm = 0x1F80 941; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 942; X86-SSE-NEXT: retl # encoding: [0xc3] 943; 944; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 945; X86-AVX: # %bb.0: 946; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 947; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 948; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 949; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 950; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 951; X86-AVX-NEXT: # imm = 0x1F80 952; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 953; X86-AVX-NEXT: retl # encoding: [0xc3] 954; 955; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 956; X64-SSE: # %bb.0: 957; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 958; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 959; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 960; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 961; X64-SSE-NEXT: # imm = 0x1F80 962; X64-SSE-NEXT: retq # encoding: [0xc3] 963; 964; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 965; X64-AVX: # %bb.0: 966; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 967; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 968; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 969; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 970; X64-AVX-NEXT: # imm = 0x1F80 971; X64-AVX-NEXT: retq # encoding: [0xc3] 972 %1 = alloca i32, align 4 973 %2 = bitcast i32* %1 to i8* 974 call void @llvm.x86.sse.stmxcsr(i8* %2) 975 %3 = load i32, i32* %1, align 4 976 %4 = and i32 %3, 8064 977 ret i32 %4 978} 979declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 980 981define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 982; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 983; X86-SSE: # %bb.0: 984; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 985; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 986; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 987; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 988; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 989; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 990; X86-SSE-NEXT: retl # encoding: [0xc3] 991; 992; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 993; X86-AVX: # %bb.0: 994; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 995; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 996; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 997; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 998; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 999; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1000; X86-AVX-NEXT: retl # encoding: [0xc3] 1001; 1002; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 1003; X64-SSE: # %bb.0: 1004; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1005; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1006; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1007; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 1008; X64-SSE-NEXT: retq # encoding: [0xc3] 1009; 1010; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 1011; X64-AVX: # %bb.0: 1012; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1013; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1014; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1015; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 1016; X64-AVX-NEXT: retq # encoding: [0xc3] 1017 %1 = alloca i32, align 4 1018 %2 = bitcast i32* %1 to i8* 1019 call void @llvm.x86.sse.stmxcsr(i8* %2) 1020 %3 = load i32, i32* %1, align 4 1021 %4 = and i32 %3, 63 1022 ret i32 %4 1023} 1024 1025define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 1026; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1027; X86-SSE: # %bb.0: 1028; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1029; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1030; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1031; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1032; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1033; X86-SSE-NEXT: # imm = 0x8000 1034; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1035; X86-SSE-NEXT: retl # encoding: [0xc3] 1036; 1037; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1038; X86-AVX: # %bb.0: 1039; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1040; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1041; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1042; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1043; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1044; X86-AVX-NEXT: # imm = 0x8000 1045; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1046; X86-AVX-NEXT: retl # encoding: [0xc3] 1047; 1048; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1049; X64-SSE: # %bb.0: 1050; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1051; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1052; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1053; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1054; X64-SSE-NEXT: # imm = 0x8000 1055; X64-SSE-NEXT: retq # encoding: [0xc3] 1056; 1057; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1058; X64-AVX: # %bb.0: 1059; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1060; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1061; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1062; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1063; X64-AVX-NEXT: # imm = 0x8000 1064; X64-AVX-NEXT: retq # encoding: [0xc3] 1065 %1 = alloca i32, align 4 1066 %2 = bitcast i32* %1 to i8* 1067 call void @llvm.x86.sse.stmxcsr(i8* %2) 1068 %3 = load i32, i32* %1, align 4 1069 %4 = and i32 %3, 32768 1070 ret i32 %4 1071} 1072 1073define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 1074; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1075; X86-SSE: # %bb.0: 1076; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1077; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1078; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1079; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1080; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1081; X86-SSE-NEXT: # imm = 0x6000 1082; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1083; X86-SSE-NEXT: retl # encoding: [0xc3] 1084; 1085; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1086; X86-AVX: # %bb.0: 1087; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1088; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1089; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1090; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1091; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1092; X86-AVX-NEXT: # imm = 0x6000 1093; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1094; X86-AVX-NEXT: retl # encoding: [0xc3] 1095; 1096; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1097; X64-SSE: # %bb.0: 1098; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1099; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1100; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1101; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1102; X64-SSE-NEXT: # imm = 0x6000 1103; X64-SSE-NEXT: retq # encoding: [0xc3] 1104; 1105; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1106; X64-AVX: # %bb.0: 1107; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1108; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1109; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1110; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1111; X64-AVX-NEXT: # imm = 0x6000 1112; X64-AVX-NEXT: retq # encoding: [0xc3] 1113 %1 = alloca i32, align 4 1114 %2 = bitcast i32* %1 to i8* 1115 call void @llvm.x86.sse.stmxcsr(i8* %2) 1116 %3 = load i32, i32* %1, align 4 1117 %4 = and i32 %3, 24576 1118 ret i32 %4 1119} 1120 1121define i32 @test_mm_getcsr() nounwind { 1122; X86-SSE-LABEL: test_mm_getcsr: 1123; X86-SSE: # %bb.0: 1124; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1125; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1126; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1127; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1128; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1129; X86-SSE-NEXT: retl # encoding: [0xc3] 1130; 1131; X86-AVX-LABEL: test_mm_getcsr: 1132; X86-AVX: # %bb.0: 1133; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1134; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1135; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1136; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1137; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1138; X86-AVX-NEXT: retl # encoding: [0xc3] 1139; 1140; X64-SSE-LABEL: test_mm_getcsr: 1141; X64-SSE: # %bb.0: 1142; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1143; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1144; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1145; X64-SSE-NEXT: retq # encoding: [0xc3] 1146; 1147; X64-AVX-LABEL: test_mm_getcsr: 1148; X64-AVX: # %bb.0: 1149; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1150; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1151; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1152; X64-AVX-NEXT: retq # encoding: [0xc3] 1153 %1 = alloca i32, align 4 1154 %2 = bitcast i32* %1 to i8* 1155 call void @llvm.x86.sse.stmxcsr(i8* %2) 1156 %3 = load i32, i32* %1, align 4 1157 ret i32 %3 1158} 1159 1160define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 1161; X86-SSE-LABEL: test_mm_load_ps: 1162; X86-SSE: # %bb.0: 1163; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1164; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1165; X86-SSE-NEXT: retl # encoding: [0xc3] 1166; 1167; X86-AVX1-LABEL: test_mm_load_ps: 1168; X86-AVX1: # %bb.0: 1169; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1170; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1171; X86-AVX1-NEXT: retl # encoding: [0xc3] 1172; 1173; X86-AVX512-LABEL: test_mm_load_ps: 1174; X86-AVX512: # %bb.0: 1175; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1176; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1177; X86-AVX512-NEXT: retl # encoding: [0xc3] 1178; 1179; X64-SSE-LABEL: test_mm_load_ps: 1180; X64-SSE: # %bb.0: 1181; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1182; X64-SSE-NEXT: retq # encoding: [0xc3] 1183; 1184; X64-AVX1-LABEL: test_mm_load_ps: 1185; X64-AVX1: # %bb.0: 1186; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1187; X64-AVX1-NEXT: retq # encoding: [0xc3] 1188; 1189; X64-AVX512-LABEL: test_mm_load_ps: 1190; X64-AVX512: # %bb.0: 1191; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1192; X64-AVX512-NEXT: retq # encoding: [0xc3] 1193 %arg0 = bitcast float* %a0 to <4 x float>* 1194 %res = load <4 x float>, <4 x float>* %arg0, align 16 1195 ret <4 x float> %res 1196} 1197 1198define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 1199; X86-SSE-LABEL: test_mm_load_ps1: 1200; X86-SSE: # %bb.0: 1201; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1202; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1203; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1204; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1205; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1206; X86-SSE-NEXT: retl # encoding: [0xc3] 1207; 1208; X86-AVX1-LABEL: test_mm_load_ps1: 1209; X86-AVX1: # %bb.0: 1210; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1211; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1212; X86-AVX1-NEXT: retl # encoding: [0xc3] 1213; 1214; X86-AVX512-LABEL: test_mm_load_ps1: 1215; X86-AVX512: # %bb.0: 1216; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1217; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1218; X86-AVX512-NEXT: retl # encoding: [0xc3] 1219; 1220; X64-SSE-LABEL: test_mm_load_ps1: 1221; X64-SSE: # %bb.0: 1222; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1223; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1224; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1225; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1226; X64-SSE-NEXT: retq # encoding: [0xc3] 1227; 1228; X64-AVX1-LABEL: test_mm_load_ps1: 1229; X64-AVX1: # %bb.0: 1230; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1231; X64-AVX1-NEXT: retq # encoding: [0xc3] 1232; 1233; X64-AVX512-LABEL: test_mm_load_ps1: 1234; X64-AVX512: # %bb.0: 1235; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1236; X64-AVX512-NEXT: retq # encoding: [0xc3] 1237 %ld = load float, float* %a0, align 4 1238 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1239 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1240 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1241 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1242 ret <4 x float> %res3 1243} 1244 1245define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 1246; X86-SSE-LABEL: test_mm_load_ss: 1247; X86-SSE: # %bb.0: 1248; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1249; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1250; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1251; X86-SSE-NEXT: retl # encoding: [0xc3] 1252; 1253; X86-AVX1-LABEL: test_mm_load_ss: 1254; X86-AVX1: # %bb.0: 1255; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1256; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 1257; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1258; X86-AVX1-NEXT: retl # encoding: [0xc3] 1259; 1260; X86-AVX512-LABEL: test_mm_load_ss: 1261; X86-AVX512: # %bb.0: 1262; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1263; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1264; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1265; X86-AVX512-NEXT: retl # encoding: [0xc3] 1266; 1267; X64-SSE-LABEL: test_mm_load_ss: 1268; X64-SSE: # %bb.0: 1269; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1270; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1271; X64-SSE-NEXT: retq # encoding: [0xc3] 1272; 1273; X64-AVX1-LABEL: test_mm_load_ss: 1274; X64-AVX1: # %bb.0: 1275; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 1276; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1277; X64-AVX1-NEXT: retq # encoding: [0xc3] 1278; 1279; X64-AVX512-LABEL: test_mm_load_ss: 1280; X64-AVX512: # %bb.0: 1281; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 1282; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1283; X64-AVX512-NEXT: retq # encoding: [0xc3] 1284 %ld = load float, float* %a0, align 1 1285 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1286 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1287 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1288 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1289 ret <4 x float> %res3 1290} 1291 1292define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1293; X86-SSE-LABEL: test_mm_load1_ps: 1294; X86-SSE: # %bb.0: 1295; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1296; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1297; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1298; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1299; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1300; X86-SSE-NEXT: retl # encoding: [0xc3] 1301; 1302; X86-AVX1-LABEL: test_mm_load1_ps: 1303; X86-AVX1: # %bb.0: 1304; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1305; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1306; X86-AVX1-NEXT: retl # encoding: [0xc3] 1307; 1308; X86-AVX512-LABEL: test_mm_load1_ps: 1309; X86-AVX512: # %bb.0: 1310; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1311; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1312; X86-AVX512-NEXT: retl # encoding: [0xc3] 1313; 1314; X64-SSE-LABEL: test_mm_load1_ps: 1315; X64-SSE: # %bb.0: 1316; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1317; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1318; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1319; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1320; X64-SSE-NEXT: retq # encoding: [0xc3] 1321; 1322; X64-AVX1-LABEL: test_mm_load1_ps: 1323; X64-AVX1: # %bb.0: 1324; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1325; X64-AVX1-NEXT: retq # encoding: [0xc3] 1326; 1327; X64-AVX512-LABEL: test_mm_load1_ps: 1328; X64-AVX512: # %bb.0: 1329; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1330; X64-AVX512-NEXT: retq # encoding: [0xc3] 1331 %ld = load float, float* %a0, align 4 1332 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1333 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1334 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1335 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1336 ret <4 x float> %res3 1337} 1338 1339define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1340; X86-SSE-LABEL: test_mm_loadh_pi: 1341; X86-SSE: # %bb.0: 1342; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1343; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00] 1344; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1345; X86-SSE-NEXT: retl # encoding: [0xc3] 1346; 1347; X86-AVX1-LABEL: test_mm_loadh_pi: 1348; X86-AVX1: # %bb.0: 1349; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1350; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00] 1351; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1352; X86-AVX1-NEXT: retl # encoding: [0xc3] 1353; 1354; X86-AVX512-LABEL: test_mm_loadh_pi: 1355; X86-AVX512: # %bb.0: 1356; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1357; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00] 1358; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1359; X86-AVX512-NEXT: retl # encoding: [0xc3] 1360; 1361; X64-SSE-LABEL: test_mm_loadh_pi: 1362; X64-SSE: # %bb.0: 1363; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07] 1364; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1365; X64-SSE-NEXT: retq # encoding: [0xc3] 1366; 1367; X64-AVX1-LABEL: test_mm_loadh_pi: 1368; X64-AVX1: # %bb.0: 1369; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07] 1370; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1371; X64-AVX1-NEXT: retq # encoding: [0xc3] 1372; 1373; X64-AVX512-LABEL: test_mm_loadh_pi: 1374; X64-AVX512: # %bb.0: 1375; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] 1376; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1377; X64-AVX512-NEXT: retq # encoding: [0xc3] 1378 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1379 %ld = load <2 x float>, <2 x float>* %ptr 1380 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1381 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1382 ret <4 x float> %res 1383} 1384 1385define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1386; X86-SSE-LABEL: test_mm_loadl_pi: 1387; X86-SSE: # %bb.0: 1388; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1389; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00] 1390; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1391; X86-SSE-NEXT: retl # encoding: [0xc3] 1392; 1393; X86-AVX1-LABEL: test_mm_loadl_pi: 1394; X86-AVX1: # %bb.0: 1395; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1396; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00] 1397; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1398; X86-AVX1-NEXT: retl # encoding: [0xc3] 1399; 1400; X86-AVX512-LABEL: test_mm_loadl_pi: 1401; X86-AVX512: # %bb.0: 1402; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1403; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00] 1404; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1405; X86-AVX512-NEXT: retl # encoding: [0xc3] 1406; 1407; X64-SSE-LABEL: test_mm_loadl_pi: 1408; X64-SSE: # %bb.0: 1409; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07] 1410; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1411; X64-SSE-NEXT: retq # encoding: [0xc3] 1412; 1413; X64-AVX1-LABEL: test_mm_loadl_pi: 1414; X64-AVX1: # %bb.0: 1415; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07] 1416; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1417; X64-AVX1-NEXT: retq # encoding: [0xc3] 1418; 1419; X64-AVX512-LABEL: test_mm_loadl_pi: 1420; X64-AVX512: # %bb.0: 1421; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] 1422; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1423; X64-AVX512-NEXT: retq # encoding: [0xc3] 1424 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1425 %ld = load <2 x float>, <2 x float>* %ptr 1426 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1427 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1428 ret <4 x float> %res 1429} 1430 1431define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1432; X86-SSE-LABEL: test_mm_loadr_ps: 1433; X86-SSE: # %bb.0: 1434; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1435; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1436; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1437; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1438; X86-SSE-NEXT: retl # encoding: [0xc3] 1439; 1440; X86-AVX1-LABEL: test_mm_loadr_ps: 1441; X86-AVX1: # %bb.0: 1442; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1443; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1444; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1445; X86-AVX1-NEXT: retl # encoding: [0xc3] 1446; 1447; X86-AVX512-LABEL: test_mm_loadr_ps: 1448; X86-AVX512: # %bb.0: 1449; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1450; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1451; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1452; X86-AVX512-NEXT: retl # encoding: [0xc3] 1453; 1454; X64-SSE-LABEL: test_mm_loadr_ps: 1455; X64-SSE: # %bb.0: 1456; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1457; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1458; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1459; X64-SSE-NEXT: retq # encoding: [0xc3] 1460; 1461; X64-AVX1-LABEL: test_mm_loadr_ps: 1462; X64-AVX1: # %bb.0: 1463; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1464; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1465; X64-AVX1-NEXT: retq # encoding: [0xc3] 1466; 1467; X64-AVX512-LABEL: test_mm_loadr_ps: 1468; X64-AVX512: # %bb.0: 1469; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1470; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1471; X64-AVX512-NEXT: retq # encoding: [0xc3] 1472 %arg0 = bitcast float* %a0 to <4 x float>* 1473 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1474 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1475 ret <4 x float> %res 1476} 1477 1478define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1479; X86-SSE-LABEL: test_mm_loadu_ps: 1480; X86-SSE: # %bb.0: 1481; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1482; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 1483; X86-SSE-NEXT: retl # encoding: [0xc3] 1484; 1485; X86-AVX1-LABEL: test_mm_loadu_ps: 1486; X86-AVX1: # %bb.0: 1487; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1488; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 1489; X86-AVX1-NEXT: retl # encoding: [0xc3] 1490; 1491; X86-AVX512-LABEL: test_mm_loadu_ps: 1492; X86-AVX512: # %bb.0: 1493; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1494; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1495; X86-AVX512-NEXT: retl # encoding: [0xc3] 1496; 1497; X64-SSE-LABEL: test_mm_loadu_ps: 1498; X64-SSE: # %bb.0: 1499; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 1500; X64-SSE-NEXT: retq # encoding: [0xc3] 1501; 1502; X64-AVX1-LABEL: test_mm_loadu_ps: 1503; X64-AVX1: # %bb.0: 1504; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 1505; X64-AVX1-NEXT: retq # encoding: [0xc3] 1506; 1507; X64-AVX512-LABEL: test_mm_loadu_ps: 1508; X64-AVX512: # %bb.0: 1509; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1510; X64-AVX512-NEXT: retq # encoding: [0xc3] 1511 %arg0 = bitcast float* %a0 to <4 x float>* 1512 %res = load <4 x float>, <4 x float>* %arg0, align 1 1513 ret <4 x float> %res 1514} 1515 1516define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1517; SSE-LABEL: test_mm_max_ps: 1518; SSE: # %bb.0: 1519; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] 1520; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1521; 1522; AVX1-LABEL: test_mm_max_ps: 1523; AVX1: # %bb.0: 1524; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] 1525; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1526; 1527; AVX512-LABEL: test_mm_max_ps: 1528; AVX512: # %bb.0: 1529; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 1530; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1531 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1532 ret <4 x float> %res 1533} 1534declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1535 1536define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1537; SSE-LABEL: test_mm_max_ss: 1538; SSE: # %bb.0: 1539; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] 1540; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1541; 1542; AVX1-LABEL: test_mm_max_ss: 1543; AVX1: # %bb.0: 1544; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] 1545; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1546; 1547; AVX512-LABEL: test_mm_max_ss: 1548; AVX512: # %bb.0: 1549; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] 1550; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1551 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1552 ret <4 x float> %res 1553} 1554declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1555 1556define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1557; SSE-LABEL: test_mm_min_ps: 1558; SSE: # %bb.0: 1559; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] 1560; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1561; 1562; AVX1-LABEL: test_mm_min_ps: 1563; AVX1: # %bb.0: 1564; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] 1565; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1566; 1567; AVX512-LABEL: test_mm_min_ps: 1568; AVX512: # %bb.0: 1569; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 1570; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1571 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1572 ret <4 x float> %res 1573} 1574declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1575 1576define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1577; SSE-LABEL: test_mm_min_ss: 1578; SSE: # %bb.0: 1579; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] 1580; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1581; 1582; AVX1-LABEL: test_mm_min_ss: 1583; AVX1: # %bb.0: 1584; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] 1585; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1586; 1587; AVX512-LABEL: test_mm_min_ss: 1588; AVX512: # %bb.0: 1589; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] 1590; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1591 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1592 ret <4 x float> %res 1593} 1594declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1595 1596define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1597; SSE-LABEL: test_mm_move_ss: 1598; SSE: # %bb.0: 1599; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 1600; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1601; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1602; 1603; AVX-LABEL: test_mm_move_ss: 1604; AVX: # %bb.0: 1605; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 1606; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1607; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1608 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1609 ret <4 x float> %res 1610} 1611 1612define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1613; SSE-LABEL: test_mm_movehl_ps: 1614; SSE: # %bb.0: 1615; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] 1616; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1] 1617; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1618; 1619; AVX1-LABEL: test_mm_movehl_ps: 1620; AVX1: # %bb.0: 1621; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] 1622; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1] 1623; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1624; 1625; AVX512-LABEL: test_mm_movehl_ps: 1626; AVX512: # %bb.0: 1627; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] 1628; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1] 1629; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1630 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1631 ret <4 x float> %res 1632} 1633 1634define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1635; SSE-LABEL: test_mm_movelh_ps: 1636; SSE: # %bb.0: 1637; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1638; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1639; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1640; 1641; AVX1-LABEL: test_mm_movelh_ps: 1642; AVX1: # %bb.0: 1643; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1644; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1645; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1646; 1647; AVX512-LABEL: test_mm_movelh_ps: 1648; AVX512: # %bb.0: 1649; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1650; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1651; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1652 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1653 ret <4 x float> %res 1654} 1655 1656define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1657; SSE-LABEL: test_mm_movemask_ps: 1658; SSE: # %bb.0: 1659; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] 1660; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1661; 1662; AVX-LABEL: test_mm_movemask_ps: 1663; AVX: # %bb.0: 1664; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] 1665; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1666 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1667 ret i32 %res 1668} 1669declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1670 1671define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1672; SSE-LABEL: test_mm_mul_ps: 1673; SSE: # %bb.0: 1674; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] 1675; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1676; 1677; AVX1-LABEL: test_mm_mul_ps: 1678; AVX1: # %bb.0: 1679; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] 1680; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1681; 1682; AVX512-LABEL: test_mm_mul_ps: 1683; AVX512: # %bb.0: 1684; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 1685; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1686 %res = fmul <4 x float> %a0, %a1 1687 ret <4 x float> %res 1688} 1689 1690define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1691; SSE-LABEL: test_mm_mul_ss: 1692; SSE: # %bb.0: 1693; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] 1694; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1695; 1696; AVX1-LABEL: test_mm_mul_ss: 1697; AVX1: # %bb.0: 1698; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] 1699; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1700; 1701; AVX512-LABEL: test_mm_mul_ss: 1702; AVX512: # %bb.0: 1703; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 1704; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1705 %ext0 = extractelement <4 x float> %a0, i32 0 1706 %ext1 = extractelement <4 x float> %a1, i32 0 1707 %fmul = fmul float %ext0, %ext1 1708 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1709 ret <4 x float> %res 1710} 1711 1712define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1713; SSE-LABEL: test_mm_or_ps: 1714; SSE: # %bb.0: 1715; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 1716; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1717; 1718; AVX1-LABEL: test_mm_or_ps: 1719; AVX1: # %bb.0: 1720; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 1721; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1722; 1723; AVX512-LABEL: test_mm_or_ps: 1724; AVX512: # %bb.0: 1725; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 1726; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1727 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1728 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1729 %res = or <4 x i32> %arg0, %arg1 1730 %bc = bitcast <4 x i32> %res to <4 x float> 1731 ret <4 x float> %bc 1732} 1733 1734define void @test_mm_prefetch(i8* %a0) { 1735; X86-LABEL: test_mm_prefetch: 1736; X86: # %bb.0: 1737; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1738; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] 1739; X86-NEXT: retl # encoding: [0xc3] 1740; 1741; X64-LABEL: test_mm_prefetch: 1742; X64: # %bb.0: 1743; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] 1744; X64-NEXT: retq # encoding: [0xc3] 1745 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1746 ret void 1747} 1748declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1749 1750define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1751; SSE-LABEL: test_mm_rcp_ps: 1752; SSE: # %bb.0: 1753; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] 1754; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1755; 1756; AVX-LABEL: test_mm_rcp_ps: 1757; AVX: # %bb.0: 1758; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] 1759; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1760 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1761 ret <4 x float> %res 1762} 1763declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1764 1765define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1766; SSE-LABEL: test_mm_rcp_ss: 1767; SSE: # %bb.0: 1768; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] 1769; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1770; 1771; AVX-LABEL: test_mm_rcp_ss: 1772; AVX: # %bb.0: 1773; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] 1774; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1775 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1776 ret <4 x float> %rcp 1777} 1778declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1779 1780define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1781; SSE-LABEL: test_mm_rsqrt_ps: 1782; SSE: # %bb.0: 1783; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] 1784; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1785; 1786; AVX-LABEL: test_mm_rsqrt_ps: 1787; AVX: # %bb.0: 1788; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] 1789; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1790 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1791 ret <4 x float> %res 1792} 1793declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1794 1795define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1796; SSE-LABEL: test_mm_rsqrt_ss: 1797; SSE: # %bb.0: 1798; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] 1799; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1800; 1801; AVX-LABEL: test_mm_rsqrt_ss: 1802; AVX: # %bb.0: 1803; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] 1804; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1805 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1806 ret <4 x float> %rsqrt 1807} 1808declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1809 1810define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1811; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1812; X86-SSE: # %bb.0: 1813; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1814; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1815; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1816; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1817; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1818; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1819; X86-SSE-NEXT: # imm = 0xE07F 1820; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1821; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1822; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1823; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1824; X86-SSE-NEXT: retl # encoding: [0xc3] 1825; 1826; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1827; X86-AVX: # %bb.0: 1828; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1829; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1830; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1831; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1832; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1833; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1834; X86-AVX-NEXT: # imm = 0xE07F 1835; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1836; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1837; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1838; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1839; X86-AVX-NEXT: retl # encoding: [0xc3] 1840; 1841; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1842; X64-SSE: # %bb.0: 1843; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1844; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1845; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1846; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1847; X64-SSE-NEXT: # imm = 0xE07F 1848; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1849; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1850; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1851; X64-SSE-NEXT: retq # encoding: [0xc3] 1852; 1853; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1854; X64-AVX: # %bb.0: 1855; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1856; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1857; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1858; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1859; X64-AVX-NEXT: # imm = 0xE07F 1860; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1861; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1862; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1863; X64-AVX-NEXT: retq # encoding: [0xc3] 1864 %1 = alloca i32, align 4 1865 %2 = bitcast i32* %1 to i8* 1866 call void @llvm.x86.sse.stmxcsr(i8* %2) 1867 %3 = load i32, i32* %1 1868 %4 = and i32 %3, -8065 1869 %5 = or i32 %4, %a0 1870 store i32 %5, i32* %1 1871 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1872 ret void 1873} 1874declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1875 1876define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1877; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1878; X86-SSE: # %bb.0: 1879; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1880; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1881; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1882; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1883; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1884; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1885; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1886; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1887; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1888; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1889; X86-SSE-NEXT: retl # encoding: [0xc3] 1890; 1891; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1892; X86-AVX: # %bb.0: 1893; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1894; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1895; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1896; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1897; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1898; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1899; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1900; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1901; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1902; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1903; X86-AVX-NEXT: retl # encoding: [0xc3] 1904; 1905; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1906; X64-SSE: # %bb.0: 1907; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1908; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1909; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1910; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1911; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1912; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1913; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1914; X64-SSE-NEXT: retq # encoding: [0xc3] 1915; 1916; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1917; X64-AVX: # %bb.0: 1918; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1919; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1920; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1921; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1922; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1923; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1924; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1925; X64-AVX-NEXT: retq # encoding: [0xc3] 1926 %1 = alloca i32, align 4 1927 %2 = bitcast i32* %1 to i8* 1928 call void @llvm.x86.sse.stmxcsr(i8* %2) 1929 %3 = load i32, i32* %1 1930 %4 = and i32 %3, -64 1931 %5 = or i32 %4, %a0 1932 store i32 %5, i32* %1 1933 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1934 ret void 1935} 1936 1937define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1938; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1939; X86-SSE: # %bb.0: 1940; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1941; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1942; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1943; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1944; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1945; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1946; X86-SSE-NEXT: # imm = 0xFFFF7FFF 1947; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1948; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1949; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1950; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1951; X86-SSE-NEXT: retl # encoding: [0xc3] 1952; 1953; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1954; X86-AVX: # %bb.0: 1955; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1956; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1957; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1958; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1959; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1960; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1961; X86-AVX-NEXT: # imm = 0xFFFF7FFF 1962; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1963; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1964; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1965; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1966; X86-AVX-NEXT: retl # encoding: [0xc3] 1967; 1968; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1969; X64-SSE: # %bb.0: 1970; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1971; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1972; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1973; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1974; X64-SSE-NEXT: # imm = 0xFFFF7FFF 1975; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1976; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1977; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1978; X64-SSE-NEXT: retq # encoding: [0xc3] 1979; 1980; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1981; X64-AVX: # %bb.0: 1982; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1983; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1984; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1985; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1986; X64-AVX-NEXT: # imm = 0xFFFF7FFF 1987; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1988; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1989; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1990; X64-AVX-NEXT: retq # encoding: [0xc3] 1991 %1 = alloca i32, align 4 1992 %2 = bitcast i32* %1 to i8* 1993 call void @llvm.x86.sse.stmxcsr(i8* %2) 1994 %3 = load i32, i32* %1 1995 %4 = and i32 %3, -32769 1996 %5 = or i32 %4, %a0 1997 store i32 %5, i32* %1 1998 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1999 ret void 2000} 2001 2002define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2003; X86-SSE-LABEL: test_mm_set_ps: 2004; X86-SSE: # %bb.0: 2005; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2006; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2007; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2008; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2009; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2010; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2011; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 2012; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2013; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] 2014; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2015; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 2016; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2017; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2018; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2019; X86-SSE-NEXT: retl # encoding: [0xc3] 2020; 2021; X86-AVX1-LABEL: test_mm_set_ps: 2022; X86-AVX1: # %bb.0: 2023; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2024; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2025; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2026; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2027; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2028; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2029; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2030; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2031; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2032; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2033; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2034; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2035; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2036; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2037; X86-AVX1-NEXT: retl # encoding: [0xc3] 2038; 2039; X86-AVX512-LABEL: test_mm_set_ps: 2040; X86-AVX512: # %bb.0: 2041; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2042; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2043; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2044; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2045; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2046; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2047; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2048; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2049; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2050; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2051; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2052; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2053; X86-AVX512-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2054; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2055; X86-AVX512-NEXT: retl # encoding: [0xc3] 2056; 2057; X64-SSE-LABEL: test_mm_set_ps: 2058; X64-SSE: # %bb.0: 2059; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2060; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2061; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] 2062; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2063; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] 2064; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0] 2065; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] 2066; X64-SSE-NEXT: retq # encoding: [0xc3] 2067; 2068; X64-AVX1-LABEL: test_mm_set_ps: 2069; X64-AVX1: # %bb.0: 2070; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2071; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2072; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2073; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2074; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2075; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2076; X64-AVX1-NEXT: retq # encoding: [0xc3] 2077; 2078; X64-AVX512-LABEL: test_mm_set_ps: 2079; X64-AVX512: # %bb.0: 2080; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2081; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2082; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2083; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2084; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2085; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2086; X64-AVX512-NEXT: retq # encoding: [0xc3] 2087 %res0 = insertelement <4 x float> undef, float %a3, i32 0 2088 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 2089 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 2090 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2091 ret <4 x float> %res3 2092} 2093 2094define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 2095; X86-SSE-LABEL: test_mm_set_ps1: 2096; X86-SSE: # %bb.0: 2097; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2098; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2099; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2100; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2101; X86-SSE-NEXT: retl # encoding: [0xc3] 2102; 2103; X86-AVX1-LABEL: test_mm_set_ps1: 2104; X86-AVX1: # %bb.0: 2105; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2106; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2107; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2108; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2109; X86-AVX1-NEXT: retl # encoding: [0xc3] 2110; 2111; X86-AVX512-LABEL: test_mm_set_ps1: 2112; X86-AVX512: # %bb.0: 2113; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2114; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2115; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2116; X86-AVX512-NEXT: retl # encoding: [0xc3] 2117; 2118; X64-SSE-LABEL: test_mm_set_ps1: 2119; X64-SSE: # %bb.0: 2120; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2121; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2122; X64-SSE-NEXT: retq # encoding: [0xc3] 2123; 2124; X64-AVX1-LABEL: test_mm_set_ps1: 2125; X64-AVX1: # %bb.0: 2126; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2127; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2128; X64-AVX1-NEXT: retq # encoding: [0xc3] 2129; 2130; X64-AVX512-LABEL: test_mm_set_ps1: 2131; X64-AVX512: # %bb.0: 2132; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2133; X64-AVX512-NEXT: retq # encoding: [0xc3] 2134 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2135 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2136 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2137 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2138 ret <4 x float> %res3 2139} 2140 2141define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 2142; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2143; X86-SSE: # %bb.0: 2144; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2145; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2146; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2147; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 2148; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2149; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2150; X86-SSE-NEXT: # imm = 0x9FFF 2151; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2152; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2153; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 2154; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2155; X86-SSE-NEXT: retl # encoding: [0xc3] 2156; 2157; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2158; X86-AVX: # %bb.0: 2159; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 2160; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2161; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2162; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 2163; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2164; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2165; X86-AVX-NEXT: # imm = 0x9FFF 2166; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2167; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2168; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 2169; X86-AVX-NEXT: popl %eax # encoding: [0x58] 2170; X86-AVX-NEXT: retl # encoding: [0xc3] 2171; 2172; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2173; X64-SSE: # %bb.0: 2174; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2175; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 2176; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2177; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2178; X64-SSE-NEXT: # imm = 0x9FFF 2179; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2180; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2181; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2182; X64-SSE-NEXT: retq # encoding: [0xc3] 2183; 2184; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2185; X64-AVX: # %bb.0: 2186; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2187; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2188; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2189; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2190; X64-AVX-NEXT: # imm = 0x9FFF 2191; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2192; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2193; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2194; X64-AVX-NEXT: retq # encoding: [0xc3] 2195 %1 = alloca i32, align 4 2196 %2 = bitcast i32* %1 to i8* 2197 call void @llvm.x86.sse.stmxcsr(i8* %2) 2198 %3 = load i32, i32* %1 2199 %4 = and i32 %3, -24577 2200 %5 = or i32 %4, %a0 2201 store i32 %5, i32* %1 2202 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2203 ret void 2204} 2205 2206define <4 x float> @test_mm_set_ss(float %a0) nounwind { 2207; X86-SSE-LABEL: test_mm_set_ss: 2208; X86-SSE: # %bb.0: 2209; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 2210; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2211; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2212; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 2213; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 2214; X86-SSE-NEXT: retl # encoding: [0xc3] 2215; 2216; X86-AVX1-LABEL: test_mm_set_ss: 2217; X86-AVX1: # %bb.0: 2218; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2219; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2220; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2221; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2222; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2223; X86-AVX1-NEXT: retl # encoding: [0xc3] 2224; 2225; X86-AVX512-LABEL: test_mm_set_ss: 2226; X86-AVX512: # %bb.0: 2227; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2228; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2229; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2230; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2231; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2232; X86-AVX512-NEXT: retl # encoding: [0xc3] 2233; 2234; X64-SSE-LABEL: test_mm_set_ss: 2235; X64-SSE: # %bb.0: 2236; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] 2237; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] 2238; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3] 2239; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 2240; X64-SSE-NEXT: retq # encoding: [0xc3] 2241; 2242; X64-AVX-LABEL: test_mm_set_ss: 2243; X64-AVX: # %bb.0: 2244; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2245; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2246; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2247; X64-AVX-NEXT: retq # encoding: [0xc3] 2248 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2249 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 2250 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 2251 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 2252 ret <4 x float> %res3 2253} 2254 2255define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 2256; X86-SSE-LABEL: test_mm_set1_ps: 2257; X86-SSE: # %bb.0: 2258; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2259; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2260; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2261; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2262; X86-SSE-NEXT: retl # encoding: [0xc3] 2263; 2264; X86-AVX1-LABEL: test_mm_set1_ps: 2265; X86-AVX1: # %bb.0: 2266; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2267; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2268; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2269; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2270; X86-AVX1-NEXT: retl # encoding: [0xc3] 2271; 2272; X86-AVX512-LABEL: test_mm_set1_ps: 2273; X86-AVX512: # %bb.0: 2274; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2275; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2276; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2277; X86-AVX512-NEXT: retl # encoding: [0xc3] 2278; 2279; X64-SSE-LABEL: test_mm_set1_ps: 2280; X64-SSE: # %bb.0: 2281; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2282; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2283; X64-SSE-NEXT: retq # encoding: [0xc3] 2284; 2285; X64-AVX1-LABEL: test_mm_set1_ps: 2286; X64-AVX1: # %bb.0: 2287; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2288; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2289; X64-AVX1-NEXT: retq # encoding: [0xc3] 2290; 2291; X64-AVX512-LABEL: test_mm_set1_ps: 2292; X64-AVX512: # %bb.0: 2293; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2294; X64-AVX512-NEXT: retq # encoding: [0xc3] 2295 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2296 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2297 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2298 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2299 ret <4 x float> %res3 2300} 2301 2302define void @test_mm_setcsr(i32 %a0) nounwind { 2303; X86-SSE-LABEL: test_mm_setcsr: 2304; X86-SSE: # %bb.0: 2305; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2306; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] 2307; X86-SSE-NEXT: retl # encoding: [0xc3] 2308; 2309; X86-AVX-LABEL: test_mm_setcsr: 2310; X86-AVX: # %bb.0: 2311; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2312; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] 2313; X86-AVX-NEXT: retl # encoding: [0xc3] 2314; 2315; X64-SSE-LABEL: test_mm_setcsr: 2316; X64-SSE: # %bb.0: 2317; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2318; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2319; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2320; X64-SSE-NEXT: retq # encoding: [0xc3] 2321; 2322; X64-AVX-LABEL: test_mm_setcsr: 2323; X64-AVX: # %bb.0: 2324; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2325; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2326; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2327; X64-AVX-NEXT: retq # encoding: [0xc3] 2328 %st = alloca i32, align 4 2329 store i32 %a0, i32* %st, align 4 2330 %bc = bitcast i32* %st to i8* 2331 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 2332 ret void 2333} 2334 2335define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2336; X86-SSE-LABEL: test_mm_setr_ps: 2337; X86-SSE: # %bb.0: 2338; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2339; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2340; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2341; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2342; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2343; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2344; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 2345; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2346; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2347; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2348; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 2349; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2350; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2351; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2352; X86-SSE-NEXT: retl # encoding: [0xc3] 2353; 2354; X86-AVX1-LABEL: test_mm_setr_ps: 2355; X86-AVX1: # %bb.0: 2356; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2357; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2358; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2359; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2360; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2361; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero 2362; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2363; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero 2364; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2365; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2366; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2367; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2368; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2369; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2370; X86-AVX1-NEXT: retl # encoding: [0xc3] 2371; 2372; X86-AVX512-LABEL: test_mm_setr_ps: 2373; X86-AVX512: # %bb.0: 2374; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2375; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2376; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2377; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2378; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2379; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2380; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2381; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2382; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2383; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2384; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2385; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2386; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2387; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2388; X86-AVX512-NEXT: retl # encoding: [0xc3] 2389; 2390; X64-SSE-LABEL: test_mm_setr_ps: 2391; X64-SSE: # %bb.0: 2392; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] 2393; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2394; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2395; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2396; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] 2397; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 2398; X64-SSE-NEXT: retq # encoding: [0xc3] 2399; 2400; X64-AVX1-LABEL: test_mm_setr_ps: 2401; X64-AVX1: # %bb.0: 2402; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2403; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2404; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2405; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2406; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2407; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2408; X64-AVX1-NEXT: retq # encoding: [0xc3] 2409; 2410; X64-AVX512-LABEL: test_mm_setr_ps: 2411; X64-AVX512: # %bb.0: 2412; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2413; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2414; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2415; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2416; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2417; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2418; X64-AVX512-NEXT: retq # encoding: [0xc3] 2419 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2420 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 2421 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 2422 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 2423 ret <4 x float> %res3 2424} 2425 2426define <4 x float> @test_mm_setzero_ps() { 2427; SSE-LABEL: test_mm_setzero_ps: 2428; SSE: # %bb.0: 2429; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2430; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2431; 2432; AVX1-LABEL: test_mm_setzero_ps: 2433; AVX1: # %bb.0: 2434; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 2435; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2436; 2437; AVX512-LABEL: test_mm_setzero_ps: 2438; AVX512: # %bb.0: 2439; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 2440; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2441 ret <4 x float> zeroinitializer 2442} 2443 2444define void @test_mm_sfence() nounwind { 2445; CHECK-LABEL: test_mm_sfence: 2446; CHECK: # %bb.0: 2447; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8] 2448; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2449 call void @llvm.x86.sse.sfence() 2450 ret void 2451} 2452declare void @llvm.x86.sse.sfence() nounwind readnone 2453 2454define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2455; SSE-LABEL: test_mm_shuffle_ps: 2456; SSE: # %bb.0: 2457; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] 2458; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2459; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2460; 2461; AVX1-LABEL: test_mm_shuffle_ps: 2462; AVX1: # %bb.0: 2463; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2464; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2465; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2466; 2467; AVX512-LABEL: test_mm_shuffle_ps: 2468; AVX512: # %bb.0: 2469; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2470; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2471; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2472 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 2473 ret <4 x float> %res 2474} 2475 2476define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 2477; SSE-LABEL: test_mm_sqrt_ps: 2478; SSE: # %bb.0: 2479; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] 2480; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2481; 2482; AVX1-LABEL: test_mm_sqrt_ps: 2483; AVX1: # %bb.0: 2484; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] 2485; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2486; 2487; AVX512-LABEL: test_mm_sqrt_ps: 2488; AVX512: # %bb.0: 2489; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 2490; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2491 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2492 ret <4 x float> %res 2493} 2494declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone 2495 2496define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 2497; SSE-LABEL: test_mm_sqrt_ss: 2498; SSE: # %bb.0: 2499; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2500; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2501; 2502; AVX1-LABEL: test_mm_sqrt_ss: 2503; AVX1: # %bb.0: 2504; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2505; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2506; 2507; AVX512-LABEL: test_mm_sqrt_ss: 2508; AVX512: # %bb.0: 2509; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2510; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2511 %ext = extractelement <4 x float> %a0, i32 0 2512 %sqrt = call float @llvm.sqrt.f32(float %ext) 2513 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 2514 ret <4 x float> %ins 2515} 2516declare float @llvm.sqrt.f32(float) nounwind readnone 2517 2518define float @test_mm_sqrt_ss_scalar(float %a0) { 2519; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: 2520; X86-SSE: # %bb.0: 2521; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2522; X86-SSE-NEXT: .cfi_def_cfa_offset 8 2523; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 2524; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2525; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2526; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 2527; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2528; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2529; X86-SSE-NEXT: .cfi_def_cfa_offset 4 2530; X86-SSE-NEXT: retl # encoding: [0xc3] 2531; 2532; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2533; X86-AVX1: # %bb.0: 2534; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 2535; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 2536; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2537; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2538; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2539; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 2540; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2541; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 2542; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 2543; X86-AVX1-NEXT: retl # encoding: [0xc3] 2544; 2545; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2546; X86-AVX512: # %bb.0: 2547; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 2548; X86-AVX512-NEXT: .cfi_def_cfa_offset 8 2549; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2550; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2551; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2552; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 2553; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2554; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 2555; X86-AVX512-NEXT: .cfi_def_cfa_offset 4 2556; X86-AVX512-NEXT: retl # encoding: [0xc3] 2557; 2558; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: 2559; X64-SSE: # %bb.0: 2560; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2561; X64-SSE-NEXT: retq # encoding: [0xc3] 2562; 2563; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2564; X64-AVX1: # %bb.0: 2565; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2566; X64-AVX1-NEXT: retq # encoding: [0xc3] 2567; 2568; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2569; X64-AVX512: # %bb.0: 2570; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2571; X64-AVX512-NEXT: retq # encoding: [0xc3] 2572 %sqrt = call float @llvm.sqrt.f32(float %a0) 2573 ret float %sqrt 2574} 2575 2576define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 2577; X86-SSE-LABEL: test_mm_store_ps: 2578; X86-SSE: # %bb.0: 2579; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2580; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2581; X86-SSE-NEXT: retl # encoding: [0xc3] 2582; 2583; X86-AVX1-LABEL: test_mm_store_ps: 2584; X86-AVX1: # %bb.0: 2585; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2586; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2587; X86-AVX1-NEXT: retl # encoding: [0xc3] 2588; 2589; X86-AVX512-LABEL: test_mm_store_ps: 2590; X86-AVX512: # %bb.0: 2591; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2592; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2593; X86-AVX512-NEXT: retl # encoding: [0xc3] 2594; 2595; X64-SSE-LABEL: test_mm_store_ps: 2596; X64-SSE: # %bb.0: 2597; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2598; X64-SSE-NEXT: retq # encoding: [0xc3] 2599; 2600; X64-AVX1-LABEL: test_mm_store_ps: 2601; X64-AVX1: # %bb.0: 2602; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2603; X64-AVX1-NEXT: retq # encoding: [0xc3] 2604; 2605; X64-AVX512-LABEL: test_mm_store_ps: 2606; X64-AVX512: # %bb.0: 2607; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2608; X64-AVX512-NEXT: retq # encoding: [0xc3] 2609 %arg0 = bitcast float* %a0 to <4 x float>* 2610 store <4 x float> %a1, <4 x float>* %arg0, align 16 2611 ret void 2612} 2613 2614define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 2615; X86-SSE-LABEL: test_mm_store_ps1: 2616; X86-SSE: # %bb.0: 2617; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2618; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2619; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2620; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2621; X86-SSE-NEXT: retl # encoding: [0xc3] 2622; 2623; X86-AVX1-LABEL: test_mm_store_ps1: 2624; X86-AVX1: # %bb.0: 2625; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2626; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2627; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2628; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2629; X86-AVX1-NEXT: retl # encoding: [0xc3] 2630; 2631; X86-AVX512-LABEL: test_mm_store_ps1: 2632; X86-AVX512: # %bb.0: 2633; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2634; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2635; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2636; X86-AVX512-NEXT: retl # encoding: [0xc3] 2637; 2638; X64-SSE-LABEL: test_mm_store_ps1: 2639; X64-SSE: # %bb.0: 2640; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2641; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2642; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2643; X64-SSE-NEXT: retq # encoding: [0xc3] 2644; 2645; X64-AVX1-LABEL: test_mm_store_ps1: 2646; X64-AVX1: # %bb.0: 2647; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2648; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2649; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2650; X64-AVX1-NEXT: retq # encoding: [0xc3] 2651; 2652; X64-AVX512-LABEL: test_mm_store_ps1: 2653; X64-AVX512: # %bb.0: 2654; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2655; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2656; X64-AVX512-NEXT: retq # encoding: [0xc3] 2657 %arg0 = bitcast float* %a0 to <4 x float>* 2658 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2659 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2660 ret void 2661} 2662 2663define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 2664; X86-SSE-LABEL: test_mm_store_ss: 2665; X86-SSE: # %bb.0: 2666; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2667; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] 2668; X86-SSE-NEXT: retl # encoding: [0xc3] 2669; 2670; X86-AVX1-LABEL: test_mm_store_ss: 2671; X86-AVX1: # %bb.0: 2672; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2673; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] 2674; X86-AVX1-NEXT: retl # encoding: [0xc3] 2675; 2676; X86-AVX512-LABEL: test_mm_store_ss: 2677; X86-AVX512: # %bb.0: 2678; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2679; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] 2680; X86-AVX512-NEXT: retl # encoding: [0xc3] 2681; 2682; X64-SSE-LABEL: test_mm_store_ss: 2683; X64-SSE: # %bb.0: 2684; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] 2685; X64-SSE-NEXT: retq # encoding: [0xc3] 2686; 2687; X64-AVX1-LABEL: test_mm_store_ss: 2688; X64-AVX1: # %bb.0: 2689; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 2690; X64-AVX1-NEXT: retq # encoding: [0xc3] 2691; 2692; X64-AVX512-LABEL: test_mm_store_ss: 2693; X64-AVX512: # %bb.0: 2694; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 2695; X64-AVX512-NEXT: retq # encoding: [0xc3] 2696 %ext = extractelement <4 x float> %a1, i32 0 2697 store float %ext, float* %a0, align 1 2698 ret void 2699} 2700 2701define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 2702; X86-SSE-LABEL: test_mm_store1_ps: 2703; X86-SSE: # %bb.0: 2704; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2705; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2706; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2707; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2708; X86-SSE-NEXT: retl # encoding: [0xc3] 2709; 2710; X86-AVX1-LABEL: test_mm_store1_ps: 2711; X86-AVX1: # %bb.0: 2712; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2713; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2714; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2715; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2716; X86-AVX1-NEXT: retl # encoding: [0xc3] 2717; 2718; X86-AVX512-LABEL: test_mm_store1_ps: 2719; X86-AVX512: # %bb.0: 2720; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2721; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2722; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2723; X86-AVX512-NEXT: retl # encoding: [0xc3] 2724; 2725; X64-SSE-LABEL: test_mm_store1_ps: 2726; X64-SSE: # %bb.0: 2727; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2728; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2729; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2730; X64-SSE-NEXT: retq # encoding: [0xc3] 2731; 2732; X64-AVX1-LABEL: test_mm_store1_ps: 2733; X64-AVX1: # %bb.0: 2734; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2735; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2736; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2737; X64-AVX1-NEXT: retq # encoding: [0xc3] 2738; 2739; X64-AVX512-LABEL: test_mm_store1_ps: 2740; X64-AVX512: # %bb.0: 2741; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2742; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2743; X64-AVX512-NEXT: retq # encoding: [0xc3] 2744 %arg0 = bitcast float* %a0 to <4 x float>* 2745 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2746 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2747 ret void 2748} 2749 2750define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { 2751; X86-SSE1-LABEL: test_mm_storeh_pi: 2752; X86-SSE1: # %bb.0: 2753; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55] 2754; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2755; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2756; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2757; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2758; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2759; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2760; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2761; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2762; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2763; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2764; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d] 2765; X86-SSE1-NEXT: retl # encoding: [0xc3] 2766; 2767; X86-SSE2-LABEL: test_mm_storeh_pi: 2768; X86-SSE2: # %bb.0: 2769; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2770; X86-SSE2-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00] 2771; X86-SSE2-NEXT: retl # encoding: [0xc3] 2772; 2773; X86-AVX1-LABEL: test_mm_storeh_pi: 2774; X86-AVX1: # %bb.0: 2775; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2776; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00] 2777; X86-AVX1-NEXT: retl # encoding: [0xc3] 2778; 2779; X86-AVX512-LABEL: test_mm_storeh_pi: 2780; X86-AVX512: # %bb.0: 2781; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2782; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] 2783; X86-AVX512-NEXT: retl # encoding: [0xc3] 2784; 2785; X64-SSE1-LABEL: test_mm_storeh_pi: 2786; X64-SSE1: # %bb.0: 2787; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2788; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] 2789; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2790; X64-SSE1-NEXT: retq # encoding: [0xc3] 2791; 2792; X64-SSE2-LABEL: test_mm_storeh_pi: 2793; X64-SSE2: # %bb.0: 2794; X64-SSE2-NEXT: punpckhqdq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x6d,0xc0] 2795; X64-SSE2-NEXT: # xmm0 = xmm0[1,1] 2796; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 2797; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2798; X64-SSE2-NEXT: retq # encoding: [0xc3] 2799; 2800; X64-AVX1-LABEL: test_mm_storeh_pi: 2801; X64-AVX1: # %bb.0: 2802; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2803; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2804; X64-AVX1-NEXT: retq # encoding: [0xc3] 2805; 2806; X64-AVX512-LABEL: test_mm_storeh_pi: 2807; X64-AVX512: # %bb.0: 2808; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2809; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2810; X64-AVX512-NEXT: retq # encoding: [0xc3] 2811 %ptr = bitcast x86_mmx* %a0 to i64* 2812 %bc = bitcast <4 x float> %a1 to <2 x i64> 2813 %ext = extractelement <2 x i64> %bc, i32 1 2814 store i64 %ext, i64* %ptr 2815 ret void 2816} 2817 2818define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { 2819; X86-SSE-LABEL: test_mm_storeh_pi2: 2820; X86-SSE: # %bb.0: 2821; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2822; X86-SSE-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00] 2823; X86-SSE-NEXT: retl # encoding: [0xc3] 2824; 2825; X86-AVX1-LABEL: test_mm_storeh_pi2: 2826; X86-AVX1: # %bb.0: 2827; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2828; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00] 2829; X86-AVX1-NEXT: retl # encoding: [0xc3] 2830; 2831; X86-AVX512-LABEL: test_mm_storeh_pi2: 2832; X86-AVX512: # %bb.0: 2833; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2834; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] 2835; X86-AVX512-NEXT: retl # encoding: [0xc3] 2836; 2837; X64-SSE-LABEL: test_mm_storeh_pi2: 2838; X64-SSE: # %bb.0: 2839; X64-SSE-NEXT: movhps %xmm0, (%rdi) # encoding: [0x0f,0x17,0x07] 2840; X64-SSE-NEXT: retq # encoding: [0xc3] 2841; 2842; X64-AVX1-LABEL: test_mm_storeh_pi2: 2843; X64-AVX1: # %bb.0: 2844; X64-AVX1-NEXT: vmovhps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x17,0x07] 2845; X64-AVX1-NEXT: retq # encoding: [0xc3] 2846; 2847; X64-AVX512-LABEL: test_mm_storeh_pi2: 2848; X64-AVX512: # %bb.0: 2849; X64-AVX512-NEXT: vmovhps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x07] 2850; X64-AVX512-NEXT: retq # encoding: [0xc3] 2851 %ptr = bitcast x86_mmx* %a0 to <2 x float>* 2852 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2853 store <2 x float> %ext, <2 x float>* %ptr 2854 ret void 2855} 2856 2857define void @test_mm_storel_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { 2858; X86-SSE1-LABEL: test_mm_storel_pi: 2859; X86-SSE1: # %bb.0: 2860; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55] 2861; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2862; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2863; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2864; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2865; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2866; X86-SSE1-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] 2867; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 2868; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2869; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2870; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2871; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d] 2872; X86-SSE1-NEXT: retl # encoding: [0xc3] 2873; 2874; X86-SSE2-LABEL: test_mm_storel_pi: 2875; X86-SSE2: # %bb.0: 2876; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2877; X86-SSE2-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 2878; X86-SSE2-NEXT: retl # encoding: [0xc3] 2879; 2880; X86-AVX1-LABEL: test_mm_storel_pi: 2881; X86-AVX1: # %bb.0: 2882; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2883; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2884; X86-AVX1-NEXT: retl # encoding: [0xc3] 2885; 2886; X86-AVX512-LABEL: test_mm_storel_pi: 2887; X86-AVX512: # %bb.0: 2888; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2889; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2890; X86-AVX512-NEXT: retl # encoding: [0xc3] 2891; 2892; X64-SSE1-LABEL: test_mm_storel_pi: 2893; X64-SSE1: # %bb.0: 2894; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2895; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] 2896; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2897; X64-SSE1-NEXT: retq # encoding: [0xc3] 2898; 2899; X64-SSE2-LABEL: test_mm_storel_pi: 2900; X64-SSE2: # %bb.0: 2901; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 2902; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2903; X64-SSE2-NEXT: retq # encoding: [0xc3] 2904; 2905; X64-AVX1-LABEL: test_mm_storel_pi: 2906; X64-AVX1: # %bb.0: 2907; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2908; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2909; X64-AVX1-NEXT: retq # encoding: [0xc3] 2910; 2911; X64-AVX512-LABEL: test_mm_storel_pi: 2912; X64-AVX512: # %bb.0: 2913; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2914; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2915; X64-AVX512-NEXT: retq # encoding: [0xc3] 2916 %ptr = bitcast x86_mmx* %a0 to i64* 2917 %bc = bitcast <4 x float> %a1 to <2 x i64> 2918 %ext = extractelement <2 x i64> %bc, i32 0 2919 store i64 %ext, i64* %ptr 2920 ret void 2921} 2922 2923; FIXME: Switch the frontend to use this code. 2924define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { 2925; X86-SSE-LABEL: test_mm_storel_pi2: 2926; X86-SSE: # %bb.0: 2927; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2928; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 2929; X86-SSE-NEXT: retl # encoding: [0xc3] 2930; 2931; X86-AVX1-LABEL: test_mm_storel_pi2: 2932; X86-AVX1: # %bb.0: 2933; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2934; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2935; X86-AVX1-NEXT: retl # encoding: [0xc3] 2936; 2937; X86-AVX512-LABEL: test_mm_storel_pi2: 2938; X86-AVX512: # %bb.0: 2939; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2940; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2941; X86-AVX512-NEXT: retl # encoding: [0xc3] 2942; 2943; X64-SSE-LABEL: test_mm_storel_pi2: 2944; X64-SSE: # %bb.0: 2945; X64-SSE-NEXT: movlps %xmm0, (%rdi) # encoding: [0x0f,0x13,0x07] 2946; X64-SSE-NEXT: retq # encoding: [0xc3] 2947; 2948; X64-AVX1-LABEL: test_mm_storel_pi2: 2949; X64-AVX1: # %bb.0: 2950; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x13,0x07] 2951; X64-AVX1-NEXT: retq # encoding: [0xc3] 2952; 2953; X64-AVX512-LABEL: test_mm_storel_pi2: 2954; X64-AVX512: # %bb.0: 2955; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] 2956; X64-AVX512-NEXT: retq # encoding: [0xc3] 2957 %ptr = bitcast x86_mmx* %a0 to <2 x float>* 2958 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2959 store <2 x float> %ext, <2 x float>* %ptr 2960 ret void 2961} 2962 2963define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 2964; X86-SSE-LABEL: test_mm_storer_ps: 2965; X86-SSE: # %bb.0: 2966; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2967; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2968; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2969; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2970; X86-SSE-NEXT: retl # encoding: [0xc3] 2971; 2972; X86-AVX1-LABEL: test_mm_storer_ps: 2973; X86-AVX1: # %bb.0: 2974; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2975; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2976; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2977; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2978; X86-AVX1-NEXT: retl # encoding: [0xc3] 2979; 2980; X86-AVX512-LABEL: test_mm_storer_ps: 2981; X86-AVX512: # %bb.0: 2982; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2983; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2984; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2985; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2986; X86-AVX512-NEXT: retl # encoding: [0xc3] 2987; 2988; X64-SSE-LABEL: test_mm_storer_ps: 2989; X64-SSE: # %bb.0: 2990; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2991; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2992; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2993; X64-SSE-NEXT: retq # encoding: [0xc3] 2994; 2995; X64-AVX1-LABEL: test_mm_storer_ps: 2996; X64-AVX1: # %bb.0: 2997; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2998; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2999; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 3000; X64-AVX1-NEXT: retq # encoding: [0xc3] 3001; 3002; X64-AVX512-LABEL: test_mm_storer_ps: 3003; X64-AVX512: # %bb.0: 3004; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 3005; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 3006; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 3007; X64-AVX512-NEXT: retq # encoding: [0xc3] 3008 %arg0 = bitcast float* %a0 to <4 x float>* 3009 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 3010 store <4 x float> %shuf, <4 x float>* %arg0, align 16 3011 ret void 3012} 3013 3014define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 3015; X86-SSE-LABEL: test_mm_storeu_ps: 3016; X86-SSE: # %bb.0: 3017; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3018; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 3019; X86-SSE-NEXT: retl # encoding: [0xc3] 3020; 3021; X86-AVX1-LABEL: test_mm_storeu_ps: 3022; X86-AVX1: # %bb.0: 3023; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3024; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 3025; X86-AVX1-NEXT: retl # encoding: [0xc3] 3026; 3027; X86-AVX512-LABEL: test_mm_storeu_ps: 3028; X86-AVX512: # %bb.0: 3029; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3030; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 3031; X86-AVX512-NEXT: retl # encoding: [0xc3] 3032; 3033; X64-SSE-LABEL: test_mm_storeu_ps: 3034; X64-SSE: # %bb.0: 3035; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 3036; X64-SSE-NEXT: retq # encoding: [0xc3] 3037; 3038; X64-AVX1-LABEL: test_mm_storeu_ps: 3039; X64-AVX1: # %bb.0: 3040; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 3041; X64-AVX1-NEXT: retq # encoding: [0xc3] 3042; 3043; X64-AVX512-LABEL: test_mm_storeu_ps: 3044; X64-AVX512: # %bb.0: 3045; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 3046; X64-AVX512-NEXT: retq # encoding: [0xc3] 3047 %arg0 = bitcast float* %a0 to <4 x float>* 3048 store <4 x float> %a1, <4 x float>* %arg0, align 1 3049 ret void 3050} 3051 3052define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 3053; X86-SSE-LABEL: test_mm_stream_ps: 3054; X86-SSE: # %bb.0: 3055; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3056; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 3057; X86-SSE-NEXT: retl # encoding: [0xc3] 3058; 3059; X86-AVX1-LABEL: test_mm_stream_ps: 3060; X86-AVX1: # %bb.0: 3061; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3062; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 3063; X86-AVX1-NEXT: retl # encoding: [0xc3] 3064; 3065; X86-AVX512-LABEL: test_mm_stream_ps: 3066; X86-AVX512: # %bb.0: 3067; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3068; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 3069; X86-AVX512-NEXT: retl # encoding: [0xc3] 3070; 3071; X64-SSE-LABEL: test_mm_stream_ps: 3072; X64-SSE: # %bb.0: 3073; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 3074; X64-SSE-NEXT: retq # encoding: [0xc3] 3075; 3076; X64-AVX1-LABEL: test_mm_stream_ps: 3077; X64-AVX1: # %bb.0: 3078; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 3079; X64-AVX1-NEXT: retq # encoding: [0xc3] 3080; 3081; X64-AVX512-LABEL: test_mm_stream_ps: 3082; X64-AVX512: # %bb.0: 3083; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 3084; X64-AVX512-NEXT: retq # encoding: [0xc3] 3085 %arg0 = bitcast float* %a0 to <4 x float>* 3086 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 3087 ret void 3088} 3089 3090define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3091; SSE-LABEL: test_mm_sub_ps: 3092; SSE: # %bb.0: 3093; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] 3094; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3095; 3096; AVX1-LABEL: test_mm_sub_ps: 3097; AVX1: # %bb.0: 3098; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] 3099; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3100; 3101; AVX512-LABEL: test_mm_sub_ps: 3102; AVX512: # %bb.0: 3103; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 3104; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3105 %res = fsub <4 x float> %a0, %a1 3106 ret <4 x float> %res 3107} 3108 3109define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3110; SSE-LABEL: test_mm_sub_ss: 3111; SSE: # %bb.0: 3112; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] 3113; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3114; 3115; AVX1-LABEL: test_mm_sub_ss: 3116; AVX1: # %bb.0: 3117; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] 3118; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3119; 3120; AVX512-LABEL: test_mm_sub_ss: 3121; AVX512: # %bb.0: 3122; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 3123; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3124 %ext0 = extractelement <4 x float> %a0, i32 0 3125 %ext1 = extractelement <4 x float> %a1, i32 0 3126 %fsub = fsub float %ext0, %ext1 3127 %res = insertelement <4 x float> %a0, float %fsub, i32 0 3128 ret <4 x float> %res 3129} 3130 3131define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 3132; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3133; X86-SSE: # %bb.0: 3134; X86-SSE-NEXT: pushl %esi # encoding: [0x56] 3135; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3136; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3137; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3138; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3139; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] 3140; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] 3141; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] 3142; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] 3143; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3144; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3145; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3146; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3147; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3148; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3149; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3150; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3151; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3152; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3153; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3154; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3155; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3156; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3157; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3158; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3159; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3160; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3161; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3162; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3163; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] 3164; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] 3165; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] 3166; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] 3167; X86-SSE-NEXT: popl %esi # encoding: [0x5e] 3168; X86-SSE-NEXT: retl # encoding: [0xc3] 3169; 3170; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3171; X86-AVX1: # %bb.0: 3172; X86-AVX1-NEXT: pushl %esi # encoding: [0x56] 3173; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3174; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3175; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3176; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3177; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] 3178; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] 3179; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] 3180; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] 3181; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3182; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3183; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3184; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3185; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3186; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3187; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3188; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3189; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3190; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3191; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3192; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3193; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3194; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3195; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3196; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3197; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] 3198; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] 3199; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] 3200; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 3201; X86-AVX1-NEXT: popl %esi # encoding: [0x5e] 3202; X86-AVX1-NEXT: retl # encoding: [0xc3] 3203; 3204; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3205; X86-AVX512: # %bb.0: 3206; X86-AVX512-NEXT: pushl %esi # encoding: [0x56] 3207; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3208; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3209; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3210; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3211; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] 3212; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] 3213; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] 3214; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] 3215; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3216; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3217; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3218; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3219; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3220; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3221; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3222; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3223; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3224; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3225; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3226; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3227; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3228; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3229; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3230; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3231; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] 3232; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] 3233; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] 3234; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 3235; X86-AVX512-NEXT: popl %esi # encoding: [0x5e] 3236; X86-AVX512-NEXT: retl # encoding: [0xc3] 3237; 3238; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3239; X64-SSE: # %bb.0: 3240; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 3241; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] 3242; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] 3243; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] 3244; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3245; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3246; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3247; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3248; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3249; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3250; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3251; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3252; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3253; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3254; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3255; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3256; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3257; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3258; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3259; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3260; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3261; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3262; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3263; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3264; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] 3265; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] 3266; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] 3267; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] 3268; X64-SSE-NEXT: retq # encoding: [0xc3] 3269; 3270; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3271; X64-AVX1: # %bb.0: 3272; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 3273; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] 3274; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] 3275; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] 3276; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3277; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3278; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3279; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3280; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3281; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3282; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3283; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3284; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3285; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3286; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3287; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3288; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3289; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3290; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3291; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3292; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] 3293; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] 3294; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] 3295; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] 3296; X64-AVX1-NEXT: retq # encoding: [0xc3] 3297; 3298; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3299; X64-AVX512: # %bb.0: 3300; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 3301; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] 3302; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] 3303; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] 3304; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3305; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3306; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3307; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3308; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3309; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3310; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3311; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3312; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3313; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3314; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3315; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3316; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3317; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3318; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3319; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3320; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] 3321; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] 3322; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] 3323; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] 3324; X64-AVX512-NEXT: retq # encoding: [0xc3] 3325 %row0 = load <4 x float>, <4 x float>* %a0, align 16 3326 %row1 = load <4 x float>, <4 x float>* %a1, align 16 3327 %row2 = load <4 x float>, <4 x float>* %a2, align 16 3328 %row3 = load <4 x float>, <4 x float>* %a3, align 16 3329 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3330 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3331 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3332 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3333 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3334 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3335 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3336 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3337 store <4 x float> %res0, <4 x float>* %a0, align 16 3338 store <4 x float> %res1, <4 x float>* %a1, align 16 3339 store <4 x float> %res2, <4 x float>* %a2, align 16 3340 store <4 x float> %res3, <4 x float>* %a3, align 16 3341 ret void 3342} 3343 3344define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3345; SSE-LABEL: test_mm_ucomieq_ss: 3346; SSE: # %bb.0: 3347; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3348; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3349; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3350; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3351; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3352; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3353; 3354; AVX1-LABEL: test_mm_ucomieq_ss: 3355; AVX1: # %bb.0: 3356; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3357; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3358; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3359; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3360; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3361; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3362; 3363; AVX512-LABEL: test_mm_ucomieq_ss: 3364; AVX512: # %bb.0: 3365; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3366; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3367; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3368; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3369; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3370; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3371 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 3372 ret i32 %res 3373} 3374declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 3375 3376define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3377; SSE-LABEL: test_mm_ucomige_ss: 3378; SSE: # %bb.0: 3379; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3380; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3381; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3382; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3383; 3384; AVX1-LABEL: test_mm_ucomige_ss: 3385; AVX1: # %bb.0: 3386; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3387; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3388; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3389; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3390; 3391; AVX512-LABEL: test_mm_ucomige_ss: 3392; AVX512: # %bb.0: 3393; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3394; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3395; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3396; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3397 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 3398 ret i32 %res 3399} 3400declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 3401 3402define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3403; SSE-LABEL: test_mm_ucomigt_ss: 3404; SSE: # %bb.0: 3405; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3406; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3407; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3408; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3409; 3410; AVX1-LABEL: test_mm_ucomigt_ss: 3411; AVX1: # %bb.0: 3412; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3413; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3414; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3415; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3416; 3417; AVX512-LABEL: test_mm_ucomigt_ss: 3418; AVX512: # %bb.0: 3419; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3420; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3421; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3422; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3423 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 3424 ret i32 %res 3425} 3426declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 3427 3428define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3429; SSE-LABEL: test_mm_ucomile_ss: 3430; SSE: # %bb.0: 3431; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3432; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3433; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3434; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3435; 3436; AVX1-LABEL: test_mm_ucomile_ss: 3437; AVX1: # %bb.0: 3438; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3439; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3440; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3441; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3442; 3443; AVX512-LABEL: test_mm_ucomile_ss: 3444; AVX512: # %bb.0: 3445; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3446; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3447; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3448; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3449 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 3450 ret i32 %res 3451} 3452declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 3453 3454define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3455; SSE-LABEL: test_mm_ucomilt_ss: 3456; SSE: # %bb.0: 3457; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3458; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3459; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3460; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3461; 3462; AVX1-LABEL: test_mm_ucomilt_ss: 3463; AVX1: # %bb.0: 3464; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3465; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3466; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3467; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3468; 3469; AVX512-LABEL: test_mm_ucomilt_ss: 3470; AVX512: # %bb.0: 3471; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3472; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3473; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3474; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3475 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 3476 ret i32 %res 3477} 3478declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 3479 3480define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3481; SSE-LABEL: test_mm_ucomineq_ss: 3482; SSE: # %bb.0: 3483; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3484; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3485; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3486; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3487; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3488; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3489; 3490; AVX1-LABEL: test_mm_ucomineq_ss: 3491; AVX1: # %bb.0: 3492; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3493; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3494; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3495; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3496; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3497; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3498; 3499; AVX512-LABEL: test_mm_ucomineq_ss: 3500; AVX512: # %bb.0: 3501; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3502; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3503; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3504; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3505; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3506; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3507 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 3508 ret i32 %res 3509} 3510declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 3511 3512define <4 x float> @test_mm_undefined_ps() { 3513; CHECK-LABEL: test_mm_undefined_ps: 3514; CHECK: # %bb.0: 3515; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3516 ret <4 x float> undef 3517} 3518 3519define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3520; SSE-LABEL: test_mm_unpackhi_ps: 3521; SSE: # %bb.0: 3522; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3523; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3524; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3525; 3526; AVX1-LABEL: test_mm_unpackhi_ps: 3527; AVX1: # %bb.0: 3528; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3529; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3530; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3531; 3532; AVX512-LABEL: test_mm_unpackhi_ps: 3533; AVX512: # %bb.0: 3534; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3535; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3536; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3537 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3538 ret <4 x float> %res 3539} 3540 3541define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3542; SSE-LABEL: test_mm_unpacklo_ps: 3543; SSE: # %bb.0: 3544; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 3545; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3546; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3547; 3548; AVX1-LABEL: test_mm_unpacklo_ps: 3549; AVX1: # %bb.0: 3550; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 3551; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3552; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3553; 3554; AVX512-LABEL: test_mm_unpacklo_ps: 3555; AVX512: # %bb.0: 3556; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 3557; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3558; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3559 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3560 ret <4 x float> %res 3561} 3562 3563define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3564; SSE-LABEL: test_mm_xor_ps: 3565; SSE: # %bb.0: 3566; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 3567; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3568; 3569; AVX1-LABEL: test_mm_xor_ps: 3570; AVX1: # %bb.0: 3571; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3572; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3573; 3574; AVX512-LABEL: test_mm_xor_ps: 3575; AVX512: # %bb.0: 3576; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 3577; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3578 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 3579 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 3580 %res = xor <4 x i32> %arg0, %arg1 3581 %bc = bitcast <4 x i32> %res to <4 x float> 3582 ret <4 x float> %bc 3583} 3584 3585!0 = !{i32 1} 3586