1; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 2; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 5 6declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 7 8define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 9; ALL-LABEL: @test1 10; ALL: phaddw 11entry: 12 %0 = bitcast <1 x i64> %b to <4 x i16> 13 %1 = bitcast <1 x i64> %a to <4 x i16> 14 %2 = bitcast <4 x i16> %1 to x86_mmx 15 %3 = bitcast <4 x i16> %0 to x86_mmx 16 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone 17 %5 = bitcast x86_mmx %4 to <4 x i16> 18 %6 = bitcast <4 x i16> %5 to <1 x i64> 19 %7 = extractelement <1 x i64> %6, i32 0 20 ret i64 %7 21} 22 23declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 24 25define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 26; ALL-LABEL: @test88 27; ALL: pcmpgtd 28entry: 29 %0 = bitcast <1 x i64> %b to <2 x i32> 30 %1 = bitcast <1 x i64> %a to <2 x i32> 31 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 32 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 33 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 34 %3 = bitcast x86_mmx %2 to <2 x i32> 35 %4 = bitcast <2 x i32> %3 to <1 x i64> 36 %5 = extractelement <1 x i64> %4, i32 0 37 ret i64 %5 38} 39 40declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 41 42define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 43; ALL-LABEL: @test87 44; ALL: pcmpgtw 45entry: 46 %0 = bitcast <1 x i64> %b to <4 x i16> 47 %1 = bitcast <1 x i64> %a to <4 x i16> 48 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 49 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 50 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 51 %3 = bitcast x86_mmx %2 to <4 x i16> 52 %4 = bitcast <4 x i16> %3 to <1 x i64> 53 %5 = extractelement <1 x i64> %4, i32 0 54 ret i64 %5 55} 56 57declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 58 59define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 60; ALL-LABEL: @test86 61; ALL: pcmpgtb 62entry: 63 %0 = bitcast <1 x i64> %b to <8 x i8> 64 %1 = bitcast <1 x i64> %a to <8 x i8> 65 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 66 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 67 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 68 %3 = bitcast x86_mmx %2 to <8 x i8> 69 %4 = bitcast <8 x i8> %3 to <1 x i64> 70 %5 = extractelement <1 x i64> %4, i32 0 71 ret i64 %5 72} 73 74declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 75 76define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 77; ALL-LABEL: @test85 78; ALL: pcmpeqd 79entry: 80 %0 = bitcast <1 x i64> %b to <2 x i32> 81 %1 = bitcast <1 x i64> %a to <2 x i32> 82 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 83 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 84 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 85 %3 = bitcast x86_mmx %2 to <2 x i32> 86 %4 = bitcast <2 x i32> %3 to <1 x i64> 87 %5 = extractelement <1 x i64> %4, i32 0 88 ret i64 %5 89} 90 91declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 92 93define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 94; ALL-LABEL: @test84 95; ALL: pcmpeqw 96entry: 97 %0 = bitcast <1 x i64> %b to <4 x i16> 98 %1 = bitcast <1 x i64> %a to <4 x i16> 99 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 100 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 101 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 102 %3 = bitcast x86_mmx %2 to <4 x i16> 103 %4 = bitcast <4 x i16> %3 to <1 x i64> 104 %5 = extractelement <1 x i64> %4, i32 0 105 ret i64 %5 106} 107 108declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 109 110define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 111; ALL-LABEL: @test83 112; ALL: pcmpeqb 113entry: 114 %0 = bitcast <1 x i64> %b to <8 x i8> 115 %1 = bitcast <1 x i64> %a to <8 x i8> 116 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 117 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 118 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 119 %3 = bitcast x86_mmx %2 to <8 x i8> 120 %4 = bitcast <8 x i8> %3 to <1 x i64> 121 %5 = extractelement <1 x i64> %4, i32 0 122 ret i64 %5 123} 124 125declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 126 127define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 128; ALL-LABEL: @test82 129; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0] 130; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0] 131entry: 132 %0 = bitcast <1 x i64> %b to <2 x i32> 133 %1 = bitcast <1 x i64> %a to <2 x i32> 134 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 135 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 136 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 137 %3 = bitcast x86_mmx %2 to <2 x i32> 138 %4 = bitcast <2 x i32> %3 to <1 x i64> 139 %5 = extractelement <1 x i64> %4, i32 0 140 ret i64 %5 141} 142 143declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 144 145define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 146; ALL-LABEL: @test81 147; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1] 148; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 149entry: 150 %0 = bitcast <1 x i64> %b to <4 x i16> 151 %1 = bitcast <1 x i64> %a to <4 x i16> 152 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 153 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 154 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 155 %3 = bitcast x86_mmx %2 to <4 x i16> 156 %4 = bitcast <4 x i16> %3 to <1 x i64> 157 %5 = extractelement <1 x i64> %4, i32 0 158 ret i64 %5 159} 160 161declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 162 163define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 164; ALL-LABEL: @test80 165; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] 166; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] 167entry: 168 %0 = bitcast <1 x i64> %b to <8 x i8> 169 %1 = bitcast <1 x i64> %a to <8 x i8> 170 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 171 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 172 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 173 %3 = bitcast x86_mmx %2 to <8 x i8> 174 %4 = bitcast <8 x i8> %3 to <1 x i64> 175 %5 = extractelement <1 x i64> %4, i32 0 176 ret i64 %5 177} 178 179declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 180 181define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 182; ALL-LABEL: @test79 183; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1] 184; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1] 185entry: 186 %0 = bitcast <1 x i64> %b to <2 x i32> 187 %1 = bitcast <1 x i64> %a to <2 x i32> 188 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 189 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 190 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 191 %3 = bitcast x86_mmx %2 to <2 x i32> 192 %4 = bitcast <2 x i32> %3 to <1 x i64> 193 %5 = extractelement <1 x i64> %4, i32 0 194 ret i64 %5 195} 196 197declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 198 199define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 200; ALL-LABEL: @test78 201; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3] 202; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3] 203entry: 204 %0 = bitcast <1 x i64> %b to <4 x i16> 205 %1 = bitcast <1 x i64> %a to <4 x i16> 206 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 207 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 208 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 209 %3 = bitcast x86_mmx %2 to <4 x i16> 210 %4 = bitcast <4 x i16> %3 to <1 x i64> 211 %5 = extractelement <1 x i64> %4, i32 0 212 ret i64 %5 213} 214 215declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 216 217define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 218; ALL-LABEL: @test77 219; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] 220; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] 221entry: 222 %0 = bitcast <1 x i64> %b to <8 x i8> 223 %1 = bitcast <1 x i64> %a to <8 x i8> 224 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 225 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 226 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 227 %3 = bitcast x86_mmx %2 to <8 x i8> 228 %4 = bitcast <8 x i8> %3 to <1 x i64> 229 %5 = extractelement <1 x i64> %4, i32 0 230 ret i64 %5 231} 232 233declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 234 235define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 236; ALL-LABEL: @test76 237; ALL: packuswb 238entry: 239 %0 = bitcast <1 x i64> %b to <4 x i16> 240 %1 = bitcast <1 x i64> %a to <4 x i16> 241 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 242 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 243 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 244 %3 = bitcast x86_mmx %2 to <8 x i8> 245 %4 = bitcast <8 x i8> %3 to <1 x i64> 246 %5 = extractelement <1 x i64> %4, i32 0 247 ret i64 %5 248} 249 250declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 251 252define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 253; ALL-LABEL: @test75 254; ALL: packssdw 255entry: 256 %0 = bitcast <1 x i64> %b to <2 x i32> 257 %1 = bitcast <1 x i64> %a to <2 x i32> 258 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 259 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 260 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 261 %3 = bitcast x86_mmx %2 to <4 x i16> 262 %4 = bitcast <4 x i16> %3 to <1 x i64> 263 %5 = extractelement <1 x i64> %4, i32 0 264 ret i64 %5 265} 266 267declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 268 269define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 270; ALL-LABEL: @test74 271; ALL: packsswb 272entry: 273 %0 = bitcast <1 x i64> %b to <4 x i16> 274 %1 = bitcast <1 x i64> %a to <4 x i16> 275 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 276 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 277 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 278 %3 = bitcast x86_mmx %2 to <8 x i8> 279 %4 = bitcast <8 x i8> %3 to <1 x i64> 280 %5 = extractelement <1 x i64> %4, i32 0 281 ret i64 %5 282} 283 284declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone 285 286define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { 287; ALL-LABEL: @test73 288; ALL: psrad 289entry: 290 %0 = bitcast <1 x i64> %a to <2 x i32> 291 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 292 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind 293 %2 = bitcast x86_mmx %1 to <2 x i32> 294 %3 = bitcast <2 x i32> %2 to <1 x i64> 295 %4 = extractelement <1 x i64> %3, i32 0 296 ret i64 %4 297} 298 299declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone 300 301define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { 302; ALL-LABEL: @test72 303; ALL: psraw 304entry: 305 %0 = bitcast <1 x i64> %a to <4 x i16> 306 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 307 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind 308 %2 = bitcast x86_mmx %1 to <4 x i16> 309 %3 = bitcast <4 x i16> %2 to <1 x i64> 310 %4 = extractelement <1 x i64> %3, i32 0 311 ret i64 %4 312} 313 314define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { 315; ALL-LABEL: @test72_2 316; ALL-NOT: psraw 317entry: 318 %0 = bitcast <1 x i64> %a to <4 x i16> 319 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 320 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind 321 %2 = bitcast x86_mmx %1 to <4 x i16> 322 %3 = bitcast <4 x i16> %2 to <1 x i64> 323 %4 = extractelement <1 x i64> %3, i32 0 324 ret i64 %4 325} 326 327declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone 328 329define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { 330; ALL-LABEL: @test71 331; ALL: psrlq 332entry: 333 %0 = extractelement <1 x i64> %a, i32 0 334 %mmx_var.i = bitcast i64 %0 to x86_mmx 335 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind 336 %2 = bitcast x86_mmx %1 to i64 337 ret i64 %2 338} 339 340declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone 341 342define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { 343; ALL-LABEL: @test70 344; ALL: psrld 345entry: 346 %0 = bitcast <1 x i64> %a to <2 x i32> 347 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 348 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind 349 %2 = bitcast x86_mmx %1 to <2 x i32> 350 %3 = bitcast <2 x i32> %2 to <1 x i64> 351 %4 = extractelement <1 x i64> %3, i32 0 352 ret i64 %4 353} 354 355define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { 356; ALL-LABEL: @test70_2 357; ALL-NOT: psrld 358entry: 359 %0 = bitcast <1 x i64> %a to <2 x i32> 360 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 361 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind 362 %2 = bitcast x86_mmx %1 to <2 x i32> 363 %3 = bitcast <2 x i32> %2 to <1 x i64> 364 %4 = extractelement <1 x i64> %3, i32 0 365 ret i64 %4 366} 367 368declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 369 370define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { 371; ALL-LABEL: @test69 372; ALL: psrlw 373entry: 374 %0 = bitcast <1 x i64> %a to <4 x i16> 375 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 376 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind 377 %2 = bitcast x86_mmx %1 to <4 x i16> 378 %3 = bitcast <4 x i16> %2 to <1 x i64> 379 %4 = extractelement <1 x i64> %3, i32 0 380 ret i64 %4 381} 382 383declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 384 385define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { 386; ALL-LABEL: @test68 387; ALL: psllq 388entry: 389 %0 = extractelement <1 x i64> %a, i32 0 390 %mmx_var.i = bitcast i64 %0 to x86_mmx 391 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind 392 %2 = bitcast x86_mmx %1 to i64 393 ret i64 %2 394} 395 396declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone 397 398define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { 399; ALL-LABEL: @test67 400; ALL: pslld 401entry: 402 %0 = bitcast <1 x i64> %a to <2 x i32> 403 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 404 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind 405 %2 = bitcast x86_mmx %1 to <2 x i32> 406 %3 = bitcast <2 x i32> %2 to <1 x i64> 407 %4 = extractelement <1 x i64> %3, i32 0 408 ret i64 %4 409} 410 411declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone 412 413define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { 414; ALL-LABEL: @test66 415; ALL: psllw 416entry: 417 %0 = bitcast <1 x i64> %a to <4 x i16> 418 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 419 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind 420 %2 = bitcast x86_mmx %1 to <4 x i16> 421 %3 = bitcast <4 x i16> %2 to <1 x i64> 422 %4 = extractelement <1 x i64> %3, i32 0 423 ret i64 %4 424} 425 426define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { 427; ALL-LABEL: @test66_2 428; ALL-NOT: psllw 429entry: 430 %0 = bitcast <1 x i64> %a to <4 x i16> 431 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 432 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind 433 %2 = bitcast x86_mmx %1 to <4 x i16> 434 %3 = bitcast <4 x i16> %2 to <1 x i64> 435 %4 = extractelement <1 x i64> %3, i32 0 436 ret i64 %4 437} 438 439declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 440 441define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 442; ALL-LABEL: @test65 443; ALL: psrad 444entry: 445 %0 = bitcast <1 x i64> %a to <2 x i32> 446 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 447 %1 = extractelement <1 x i64> %b, i32 0 448 %mmx_var1.i = bitcast i64 %1 to x86_mmx 449 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 450 %3 = bitcast x86_mmx %2 to <2 x i32> 451 %4 = bitcast <2 x i32> %3 to <1 x i64> 452 %5 = extractelement <1 x i64> %4, i32 0 453 ret i64 %5 454} 455 456declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 457 458define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 459; ALL-LABEL: @test64 460; ALL: psraw 461entry: 462 %0 = bitcast <1 x i64> %a to <4 x i16> 463 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 464 %1 = extractelement <1 x i64> %b, i32 0 465 %mmx_var1.i = bitcast i64 %1 to x86_mmx 466 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 467 %3 = bitcast x86_mmx %2 to <4 x i16> 468 %4 = bitcast <4 x i16> %3 to <1 x i64> 469 %5 = extractelement <1 x i64> %4, i32 0 470 ret i64 %5 471} 472 473declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 474 475define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 476; ALL-LABEL: @test63 477; ALL: psrlq 478entry: 479 %0 = extractelement <1 x i64> %a, i32 0 480 %mmx_var.i = bitcast i64 %0 to x86_mmx 481 %1 = extractelement <1 x i64> %b, i32 0 482 %mmx_var1.i = bitcast i64 %1 to x86_mmx 483 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 484 %3 = bitcast x86_mmx %2 to i64 485 ret i64 %3 486} 487 488declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 489 490define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 491; ALL-LABEL: @test62 492; ALL: psrld 493entry: 494 %0 = bitcast <1 x i64> %a to <2 x i32> 495 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 496 %1 = extractelement <1 x i64> %b, i32 0 497 %mmx_var1.i = bitcast i64 %1 to x86_mmx 498 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 499 %3 = bitcast x86_mmx %2 to <2 x i32> 500 %4 = bitcast <2 x i32> %3 to <1 x i64> 501 %5 = extractelement <1 x i64> %4, i32 0 502 ret i64 %5 503} 504 505declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 506 507define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 508; ALL-LABEL: @test61 509; ALL: psrlw 510entry: 511 %0 = bitcast <1 x i64> %a to <4 x i16> 512 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 513 %1 = extractelement <1 x i64> %b, i32 0 514 %mmx_var1.i = bitcast i64 %1 to x86_mmx 515 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 516 %3 = bitcast x86_mmx %2 to <4 x i16> 517 %4 = bitcast <4 x i16> %3 to <1 x i64> 518 %5 = extractelement <1 x i64> %4, i32 0 519 ret i64 %5 520} 521 522declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 523 524define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 525; ALL-LABEL: @test60 526; ALL: psllq 527entry: 528 %0 = extractelement <1 x i64> %a, i32 0 529 %mmx_var.i = bitcast i64 %0 to x86_mmx 530 %1 = extractelement <1 x i64> %b, i32 0 531 %mmx_var1.i = bitcast i64 %1 to x86_mmx 532 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 533 %3 = bitcast x86_mmx %2 to i64 534 ret i64 %3 535} 536 537declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 538 539define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 540; ALL-LABEL: @test59 541; ALL: pslld 542entry: 543 %0 = bitcast <1 x i64> %a to <2 x i32> 544 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 545 %1 = extractelement <1 x i64> %b, i32 0 546 %mmx_var1.i = bitcast i64 %1 to x86_mmx 547 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 548 %3 = bitcast x86_mmx %2 to <2 x i32> 549 %4 = bitcast <2 x i32> %3 to <1 x i64> 550 %5 = extractelement <1 x i64> %4, i32 0 551 ret i64 %5 552} 553 554declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 555 556define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 557; ALL-LABEL: @test58 558; ALL: psllw 559entry: 560 %0 = bitcast <1 x i64> %a to <4 x i16> 561 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 562 %1 = extractelement <1 x i64> %b, i32 0 563 %mmx_var1.i = bitcast i64 %1 to x86_mmx 564 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 565 %3 = bitcast x86_mmx %2 to <4 x i16> 566 %4 = bitcast <4 x i16> %3 to <1 x i64> 567 %5 = extractelement <1 x i64> %4, i32 0 568 ret i64 %5 569} 570 571declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 572 573define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 574; ALL-LABEL: @test56 575; ALL: pxor 576entry: 577 %0 = bitcast <1 x i64> %b to <2 x i32> 578 %1 = bitcast <1 x i64> %a to <2 x i32> 579 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 580 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 581 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 582 %3 = bitcast x86_mmx %2 to <2 x i32> 583 %4 = bitcast <2 x i32> %3 to <1 x i64> 584 %5 = extractelement <1 x i64> %4, i32 0 585 ret i64 %5 586} 587 588declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 589 590define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 591; ALL-LABEL: @test55 592; ALL: por 593entry: 594 %0 = bitcast <1 x i64> %b to <2 x i32> 595 %1 = bitcast <1 x i64> %a to <2 x i32> 596 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 597 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 598 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 599 %3 = bitcast x86_mmx %2 to <2 x i32> 600 %4 = bitcast <2 x i32> %3 to <1 x i64> 601 %5 = extractelement <1 x i64> %4, i32 0 602 ret i64 %5 603} 604 605declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 606 607define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 608; ALL-LABEL: @test54 609; ALL: pandn 610entry: 611 %0 = bitcast <1 x i64> %b to <2 x i32> 612 %1 = bitcast <1 x i64> %a to <2 x i32> 613 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 614 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 615 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 616 %3 = bitcast x86_mmx %2 to <2 x i32> 617 %4 = bitcast <2 x i32> %3 to <1 x i64> 618 %5 = extractelement <1 x i64> %4, i32 0 619 ret i64 %5 620} 621 622declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 623 624define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 625; ALL-LABEL: @test53 626; ALL: pand 627entry: 628 %0 = bitcast <1 x i64> %b to <2 x i32> 629 %1 = bitcast <1 x i64> %a to <2 x i32> 630 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 631 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 632 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 633 %3 = bitcast x86_mmx %2 to <2 x i32> 634 %4 = bitcast <2 x i32> %3 to <1 x i64> 635 %5 = extractelement <1 x i64> %4, i32 0 636 ret i64 %5 637} 638 639declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 640 641define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 642; ALL-LABEL: @test52 643; ALL: pmullw 644entry: 645 %0 = bitcast <1 x i64> %b to <4 x i16> 646 %1 = bitcast <1 x i64> %a to <4 x i16> 647 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 648 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 649 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 650 %3 = bitcast x86_mmx %2 to <4 x i16> 651 %4 = bitcast <4 x i16> %3 to <1 x i64> 652 %5 = extractelement <1 x i64> %4, i32 0 653 ret i64 %5 654} 655 656define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 657; ALL-LABEL: @test51 658; ALL: pmullw 659entry: 660 %0 = bitcast <1 x i64> %b to <4 x i16> 661 %1 = bitcast <1 x i64> %a to <4 x i16> 662 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 663 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 664 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 665 %3 = bitcast x86_mmx %2 to <4 x i16> 666 %4 = bitcast <4 x i16> %3 to <1 x i64> 667 %5 = extractelement <1 x i64> %4, i32 0 668 ret i64 %5 669} 670 671declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 672 673define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 674; ALL-LABEL: @test50 675; ALL: pmulhw 676entry: 677 %0 = bitcast <1 x i64> %b to <4 x i16> 678 %1 = bitcast <1 x i64> %a to <4 x i16> 679 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 680 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 681 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 682 %3 = bitcast x86_mmx %2 to <4 x i16> 683 %4 = bitcast <4 x i16> %3 to <1 x i64> 684 %5 = extractelement <1 x i64> %4, i32 0 685 ret i64 %5 686} 687 688declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 689 690define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 691; ALL-LABEL: @test49 692; ALL: pmaddwd 693entry: 694 %0 = bitcast <1 x i64> %b to <4 x i16> 695 %1 = bitcast <1 x i64> %a to <4 x i16> 696 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 697 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 698 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 699 %3 = bitcast x86_mmx %2 to <2 x i32> 700 %4 = bitcast <2 x i32> %3 to <1 x i64> 701 %5 = extractelement <1 x i64> %4, i32 0 702 ret i64 %5 703} 704 705declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 706 707define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 708; ALL-LABEL: @test48 709; ALL: psubusw 710entry: 711 %0 = bitcast <1 x i64> %b to <4 x i16> 712 %1 = bitcast <1 x i64> %a to <4 x i16> 713 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 714 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 715 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 716 %3 = bitcast x86_mmx %2 to <4 x i16> 717 %4 = bitcast <4 x i16> %3 to <1 x i64> 718 %5 = extractelement <1 x i64> %4, i32 0 719 ret i64 %5 720} 721 722declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 723 724define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 725; ALL-LABEL: @test47 726; ALL: psubusb 727entry: 728 %0 = bitcast <1 x i64> %b to <8 x i8> 729 %1 = bitcast <1 x i64> %a to <8 x i8> 730 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 731 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 732 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 733 %3 = bitcast x86_mmx %2 to <8 x i8> 734 %4 = bitcast <8 x i8> %3 to <1 x i64> 735 %5 = extractelement <1 x i64> %4, i32 0 736 ret i64 %5 737} 738 739declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 740 741define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 742; ALL-LABEL: @test46 743; ALL: psubsw 744entry: 745 %0 = bitcast <1 x i64> %b to <4 x i16> 746 %1 = bitcast <1 x i64> %a to <4 x i16> 747 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 748 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 749 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 750 %3 = bitcast x86_mmx %2 to <4 x i16> 751 %4 = bitcast <4 x i16> %3 to <1 x i64> 752 %5 = extractelement <1 x i64> %4, i32 0 753 ret i64 %5 754} 755 756declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 757 758define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 759; ALL-LABEL: @test45 760; ALL: psubsb 761entry: 762 %0 = bitcast <1 x i64> %b to <8 x i8> 763 %1 = bitcast <1 x i64> %a to <8 x i8> 764 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 765 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 766 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 767 %3 = bitcast x86_mmx %2 to <8 x i8> 768 %4 = bitcast <8 x i8> %3 to <1 x i64> 769 %5 = extractelement <1 x i64> %4, i32 0 770 ret i64 %5 771} 772 773define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 774; ALL-LABEL: @test44 775; ALL: psubq 776entry: 777 %0 = extractelement <1 x i64> %a, i32 0 778 %mmx_var = bitcast i64 %0 to x86_mmx 779 %1 = extractelement <1 x i64> %b, i32 0 780 %mmx_var1 = bitcast i64 %1 to x86_mmx 781 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 782 %3 = bitcast x86_mmx %2 to i64 783 ret i64 %3 784} 785 786declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 787 788declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 789 790define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 791; ALL-LABEL: @test43 792; ALL: psubd 793entry: 794 %0 = bitcast <1 x i64> %b to <2 x i32> 795 %1 = bitcast <1 x i64> %a to <2 x i32> 796 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 797 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 798 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 799 %3 = bitcast x86_mmx %2 to <2 x i32> 800 %4 = bitcast <2 x i32> %3 to <1 x i64> 801 %5 = extractelement <1 x i64> %4, i32 0 802 ret i64 %5 803} 804 805declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 806 807define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 808; ALL-LABEL: @test42 809; ALL: psubw 810entry: 811 %0 = bitcast <1 x i64> %b to <4 x i16> 812 %1 = bitcast <1 x i64> %a to <4 x i16> 813 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 814 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 815 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 816 %3 = bitcast x86_mmx %2 to <4 x i16> 817 %4 = bitcast <4 x i16> %3 to <1 x i64> 818 %5 = extractelement <1 x i64> %4, i32 0 819 ret i64 %5 820} 821 822declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 823 824define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 825; ALL-LABEL: @test41 826; ALL: psubb 827entry: 828 %0 = bitcast <1 x i64> %b to <8 x i8> 829 %1 = bitcast <1 x i64> %a to <8 x i8> 830 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 831 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 832 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 833 %3 = bitcast x86_mmx %2 to <8 x i8> 834 %4 = bitcast <8 x i8> %3 to <1 x i64> 835 %5 = extractelement <1 x i64> %4, i32 0 836 ret i64 %5 837} 838 839declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 840 841define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 842; ALL-LABEL: @test40 843; ALL: paddusw 844entry: 845 %0 = bitcast <1 x i64> %b to <4 x i16> 846 %1 = bitcast <1 x i64> %a to <4 x i16> 847 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 848 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 849 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 850 %3 = bitcast x86_mmx %2 to <4 x i16> 851 %4 = bitcast <4 x i16> %3 to <1 x i64> 852 %5 = extractelement <1 x i64> %4, i32 0 853 ret i64 %5 854} 855 856declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 857 858define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 859; ALL-LABEL: @test39 860; ALL: paddusb 861entry: 862 %0 = bitcast <1 x i64> %b to <8 x i8> 863 %1 = bitcast <1 x i64> %a to <8 x i8> 864 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 865 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 866 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 867 %3 = bitcast x86_mmx %2 to <8 x i8> 868 %4 = bitcast <8 x i8> %3 to <1 x i64> 869 %5 = extractelement <1 x i64> %4, i32 0 870 ret i64 %5 871} 872 873declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 874 875define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 876; ALL-LABEL: @test38 877; ALL: paddsw 878entry: 879 %0 = bitcast <1 x i64> %b to <4 x i16> 880 %1 = bitcast <1 x i64> %a to <4 x i16> 881 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 882 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 883 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 884 %3 = bitcast x86_mmx %2 to <4 x i16> 885 %4 = bitcast <4 x i16> %3 to <1 x i64> 886 %5 = extractelement <1 x i64> %4, i32 0 887 ret i64 %5 888} 889 890declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 891 892define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 893; ALL-LABEL: @test37 894; ALL: paddsb 895entry: 896 %0 = bitcast <1 x i64> %b to <8 x i8> 897 %1 = bitcast <1 x i64> %a to <8 x i8> 898 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 899 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 900 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 901 %3 = bitcast x86_mmx %2 to <8 x i8> 902 %4 = bitcast <8 x i8> %3 to <1 x i64> 903 %5 = extractelement <1 x i64> %4, i32 0 904 ret i64 %5 905} 906 907declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 908 909define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 910; ALL-LABEL: @test36 911; ALL: paddq 912entry: 913 %0 = extractelement <1 x i64> %a, i32 0 914 %mmx_var = bitcast i64 %0 to x86_mmx 915 %1 = extractelement <1 x i64> %b, i32 0 916 %mmx_var1 = bitcast i64 %1 to x86_mmx 917 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 918 %3 = bitcast x86_mmx %2 to i64 919 ret i64 %3 920} 921 922declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 923 924define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 925; ALL-LABEL: @test35 926; ALL: paddd 927entry: 928 %0 = bitcast <1 x i64> %b to <2 x i32> 929 %1 = bitcast <1 x i64> %a to <2 x i32> 930 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 931 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 932 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 933 %3 = bitcast x86_mmx %2 to <2 x i32> 934 %4 = bitcast <2 x i32> %3 to <1 x i64> 935 %5 = extractelement <1 x i64> %4, i32 0 936 ret i64 %5 937} 938 939declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 940 941define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 942; ALL-LABEL: @test34 943; ALL: paddw 944entry: 945 %0 = bitcast <1 x i64> %b to <4 x i16> 946 %1 = bitcast <1 x i64> %a to <4 x i16> 947 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 948 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 949 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 950 %3 = bitcast x86_mmx %2 to <4 x i16> 951 %4 = bitcast <4 x i16> %3 to <1 x i64> 952 %5 = extractelement <1 x i64> %4, i32 0 953 ret i64 %5 954} 955 956declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 957 958define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 959; ALL-LABEL: @test33 960; ALL: paddb 961entry: 962 %0 = bitcast <1 x i64> %b to <8 x i8> 963 %1 = bitcast <1 x i64> %a to <8 x i8> 964 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 965 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 966 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 967 %3 = bitcast x86_mmx %2 to <8 x i8> 968 %4 = bitcast <8 x i8> %3 to <1 x i64> 969 %5 = extractelement <1 x i64> %4, i32 0 970 ret i64 %5 971} 972 973declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 974 975define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 976; ALL-LABEL: @test32 977; ALL: psadbw 978entry: 979 %0 = bitcast <1 x i64> %b to <8 x i8> 980 %1 = bitcast <1 x i64> %a to <8 x i8> 981 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 982 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 983 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 984 %3 = bitcast x86_mmx %2 to i64 985 ret i64 %3 986} 987 988declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 989 990define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 991; ALL-LABEL: @test31 992; ALL: pminsw 993entry: 994 %0 = bitcast <1 x i64> %b to <4 x i16> 995 %1 = bitcast <1 x i64> %a to <4 x i16> 996 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 997 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 998 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 999 %3 = bitcast x86_mmx %2 to <4 x i16> 1000 %4 = bitcast <4 x i16> %3 to <1 x i64> 1001 %5 = extractelement <1 x i64> %4, i32 0 1002 ret i64 %5 1003} 1004 1005declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 1006 1007define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1008; ALL-LABEL: @test30 1009; ALL: pminub 1010entry: 1011 %0 = bitcast <1 x i64> %b to <8 x i8> 1012 %1 = bitcast <1 x i64> %a to <8 x i8> 1013 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1014 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1015 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1016 %3 = bitcast x86_mmx %2 to <8 x i8> 1017 %4 = bitcast <8 x i8> %3 to <1 x i64> 1018 %5 = extractelement <1 x i64> %4, i32 0 1019 ret i64 %5 1020} 1021 1022declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 1023 1024define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1025; ALL-LABEL: @test29 1026; ALL: pmaxsw 1027entry: 1028 %0 = bitcast <1 x i64> %b to <4 x i16> 1029 %1 = bitcast <1 x i64> %a to <4 x i16> 1030 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 1031 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 1032 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1033 %3 = bitcast x86_mmx %2 to <4 x i16> 1034 %4 = bitcast <4 x i16> %3 to <1 x i64> 1035 %5 = extractelement <1 x i64> %4, i32 0 1036 ret i64 %5 1037} 1038 1039declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 1040 1041define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1042; ALL-LABEL: @test28 1043; ALL: pmaxub 1044entry: 1045 %0 = bitcast <1 x i64> %b to <8 x i8> 1046 %1 = bitcast <1 x i64> %a to <8 x i8> 1047 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1048 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1049 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1050 %3 = bitcast x86_mmx %2 to <8 x i8> 1051 %4 = bitcast <8 x i8> %3 to <1 x i64> 1052 %5 = extractelement <1 x i64> %4, i32 0 1053 ret i64 %5 1054} 1055 1056declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 1057 1058define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1059; ALL-LABEL: @test27 1060; ALL: pavgw 1061entry: 1062 %0 = bitcast <1 x i64> %b to <4 x i16> 1063 %1 = bitcast <1 x i64> %a to <4 x i16> 1064 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 1065 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 1066 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1067 %3 = bitcast x86_mmx %2 to <4 x i16> 1068 %4 = bitcast <4 x i16> %3 to <1 x i64> 1069 %5 = extractelement <1 x i64> %4, i32 0 1070 ret i64 %5 1071} 1072 1073declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 1074 1075define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1076; ALL-LABEL: @test26 1077; ALL: pavgb 1078entry: 1079 %0 = bitcast <1 x i64> %b to <8 x i8> 1080 %1 = bitcast <1 x i64> %a to <8 x i8> 1081 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1082 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1083 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1084 %3 = bitcast x86_mmx %2 to <8 x i8> 1085 %4 = bitcast <8 x i8> %3 to <1 x i64> 1086 %5 = extractelement <1 x i64> %4, i32 0 1087 ret i64 %5 1088} 1089 1090declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind 1091 1092define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp { 1093; ALL-LABEL: @test25 1094; ALL: movntq 1095entry: 1096 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx* 1097 %0 = extractelement <1 x i64> %a, i32 0 1098 %mmx_var.i = bitcast i64 %0 to x86_mmx 1099 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind 1100 ret void 1101} 1102 1103declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone 1104 1105define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { 1106; ALL-LABEL: @test24 1107; ALL: pmovmskb 1108entry: 1109 %0 = bitcast <1 x i64> %a to <8 x i8> 1110 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx 1111 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind 1112 ret i32 %1 1113} 1114 1115declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind 1116 1117define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp { 1118; ALL-LABEL: @test23 1119; ALL: maskmovq 1120entry: 1121 %0 = bitcast <1 x i64> %n to <8 x i8> 1122 %1 = bitcast <1 x i64> %d to <8 x i8> 1123 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1124 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1125 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind 1126 ret void 1127} 1128 1129declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 1130 1131define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1132; ALL-LABEL: @test22 1133; ALL: pmulhuw 1134entry: 1135 %0 = bitcast <1 x i64> %b to <4 x i16> 1136 %1 = bitcast <1 x i64> %a to <4 x i16> 1137 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 1138 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 1139 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1140 %3 = bitcast x86_mmx %2 to <4 x i16> 1141 %4 = bitcast <4 x i16> %3 to <1 x i64> 1142 %5 = extractelement <1 x i64> %4, i32 0 1143 ret i64 %5 1144} 1145 1146declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 1147 1148define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { 1149; ALL-LABEL: @test21 1150; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] 1151; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] 1152entry: 1153 %0 = bitcast <1 x i64> %a to <4 x i16> 1154 %1 = bitcast <4 x i16> %0 to x86_mmx 1155 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1156 %3 = bitcast x86_mmx %2 to <4 x i16> 1157 %4 = bitcast <4 x i16> %3 to <1 x i64> 1158 %5 = extractelement <1 x i64> %4, i32 0 1159 ret i64 %5 1160} 1161 1162define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { 1163; ALL-LABEL: @test21_2 1164; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] 1165; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] 1166; ALL: movd 1167entry: 1168 %0 = bitcast <1 x i64> %a to <4 x i16> 1169 %1 = bitcast <4 x i16> %0 to x86_mmx 1170 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1171 %3 = bitcast x86_mmx %2 to <4 x i16> 1172 %4 = bitcast <4 x i16> %3 to <2 x i32> 1173 %5 = extractelement <2 x i32> %4, i32 0 1174 ret i32 %5 1175} 1176 1177declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 1178 1179define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1180; ALL-LABEL: @test20 1181; ALL: pmuludq 1182entry: 1183 %0 = bitcast <1 x i64> %b to <2 x i32> 1184 %1 = bitcast <1 x i64> %a to <2 x i32> 1185 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 1186 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 1187 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1188 %3 = bitcast x86_mmx %2 to i64 1189 ret i64 %3 1190} 1191 1192declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 1193 1194define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { 1195; ALL-LABEL: @test19 1196; ALL: cvtpi2pd 1197entry: 1198 %0 = bitcast <1 x i64> %a to <2 x i32> 1199 %1 = bitcast <2 x i32> %0 to x86_mmx 1200 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone 1201 ret <2 x double> %2 1202} 1203 1204declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 1205 1206define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { 1207; ALL-LABEL: @test18 1208; ALL: cvttpd2pi 1209entry: 1210 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone 1211 %1 = bitcast x86_mmx %0 to <2 x i32> 1212 %2 = bitcast <2 x i32> %1 to <1 x i64> 1213 %3 = extractelement <1 x i64> %2, i32 0 1214 ret i64 %3 1215} 1216 1217declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 1218 1219define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { 1220; ALL-LABEL: @test17 1221; ALL: cvtpd2pi 1222entry: 1223 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone 1224 %1 = bitcast x86_mmx %0 to <2 x i32> 1225 %2 = bitcast <2 x i32> %1 to <1 x i64> 1226 %3 = extractelement <1 x i64> %2, i32 0 1227 ret i64 %3 1228} 1229 1230declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 1231 1232define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1233; ALL-LABEL: @test16 1234; ALL: palignr 1235entry: 1236 %0 = extractelement <1 x i64> %a, i32 0 1237 %mmx_var = bitcast i64 %0 to x86_mmx 1238 %1 = extractelement <1 x i64> %b, i32 0 1239 %mmx_var1 = bitcast i64 %1 to x86_mmx 1240 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) 1241 %3 = bitcast x86_mmx %2 to i64 1242 ret i64 %3 1243} 1244 1245declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 1246 1247define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { 1248; ALL-LABEL: @test15 1249; ALL: pabsd 1250entry: 1251 %0 = bitcast <1 x i64> %a to <2 x i32> 1252 %1 = bitcast <2 x i32> %0 to x86_mmx 1253 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone 1254 %3 = bitcast x86_mmx %2 to <2 x i32> 1255 %4 = bitcast <2 x i32> %3 to <1 x i64> 1256 %5 = extractelement <1 x i64> %4, i32 0 1257 ret i64 %5 1258} 1259 1260declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 1261 1262define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { 1263; ALL-LABEL: @test14 1264; ALL: pabsw 1265entry: 1266 %0 = bitcast <1 x i64> %a to <4 x i16> 1267 %1 = bitcast <4 x i16> %0 to x86_mmx 1268 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone 1269 %3 = bitcast x86_mmx %2 to <4 x i16> 1270 %4 = bitcast <4 x i16> %3 to <1 x i64> 1271 %5 = extractelement <1 x i64> %4, i32 0 1272 ret i64 %5 1273} 1274 1275declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 1276 1277define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { 1278; ALL-LABEL: @test13 1279; ALL: pabsb 1280entry: 1281 %0 = bitcast <1 x i64> %a to <8 x i8> 1282 %1 = bitcast <8 x i8> %0 to x86_mmx 1283 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone 1284 %3 = bitcast x86_mmx %2 to <8 x i8> 1285 %4 = bitcast <8 x i8> %3 to <1 x i64> 1286 %5 = extractelement <1 x i64> %4, i32 0 1287 ret i64 %5 1288} 1289 1290declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 1291 1292define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1293; ALL-LABEL: @test12 1294; ALL: psignd 1295entry: 1296 %0 = bitcast <1 x i64> %b to <2 x i32> 1297 %1 = bitcast <1 x i64> %a to <2 x i32> 1298 %2 = bitcast <2 x i32> %1 to x86_mmx 1299 %3 = bitcast <2 x i32> %0 to x86_mmx 1300 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1301 %5 = bitcast x86_mmx %4 to <2 x i32> 1302 %6 = bitcast <2 x i32> %5 to <1 x i64> 1303 %7 = extractelement <1 x i64> %6, i32 0 1304 ret i64 %7 1305} 1306 1307declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 1308 1309define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1310; ALL-LABEL: @test11 1311; ALL: psignw 1312entry: 1313 %0 = bitcast <1 x i64> %b to <4 x i16> 1314 %1 = bitcast <1 x i64> %a to <4 x i16> 1315 %2 = bitcast <4 x i16> %1 to x86_mmx 1316 %3 = bitcast <4 x i16> %0 to x86_mmx 1317 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1318 %5 = bitcast x86_mmx %4 to <4 x i16> 1319 %6 = bitcast <4 x i16> %5 to <1 x i64> 1320 %7 = extractelement <1 x i64> %6, i32 0 1321 ret i64 %7 1322} 1323 1324declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 1325 1326define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1327; ALL-LABEL: @test10 1328; ALL: psignb 1329entry: 1330 %0 = bitcast <1 x i64> %b to <8 x i8> 1331 %1 = bitcast <1 x i64> %a to <8 x i8> 1332 %2 = bitcast <8 x i8> %1 to x86_mmx 1333 %3 = bitcast <8 x i8> %0 to x86_mmx 1334 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1335 %5 = bitcast x86_mmx %4 to <8 x i8> 1336 %6 = bitcast <8 x i8> %5 to <1 x i64> 1337 %7 = extractelement <1 x i64> %6, i32 0 1338 ret i64 %7 1339} 1340 1341declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 1342 1343define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1344; ALL-LABEL: @test9 1345; ALL: pshufb 1346entry: 1347 %0 = bitcast <1 x i64> %b to <8 x i8> 1348 %1 = bitcast <1 x i64> %a to <8 x i8> 1349 %2 = bitcast <8 x i8> %1 to x86_mmx 1350 %3 = bitcast <8 x i8> %0 to x86_mmx 1351 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1352 %5 = bitcast x86_mmx %4 to <8 x i8> 1353 %6 = bitcast <8 x i8> %5 to <1 x i64> 1354 %7 = extractelement <1 x i64> %6, i32 0 1355 ret i64 %7 1356} 1357 1358declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 1359 1360define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1361; ALL-LABEL: @test8 1362; ALL: pmulhrsw 1363entry: 1364 %0 = bitcast <1 x i64> %b to <4 x i16> 1365 %1 = bitcast <1 x i64> %a to <4 x i16> 1366 %2 = bitcast <4 x i16> %1 to x86_mmx 1367 %3 = bitcast <4 x i16> %0 to x86_mmx 1368 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1369 %5 = bitcast x86_mmx %4 to <4 x i16> 1370 %6 = bitcast <4 x i16> %5 to <1 x i64> 1371 %7 = extractelement <1 x i64> %6, i32 0 1372 ret i64 %7 1373} 1374 1375declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 1376 1377define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1378; ALL-LABEL: @test7 1379; ALL: pmaddubsw 1380entry: 1381 %0 = bitcast <1 x i64> %b to <8 x i8> 1382 %1 = bitcast <1 x i64> %a to <8 x i8> 1383 %2 = bitcast <8 x i8> %1 to x86_mmx 1384 %3 = bitcast <8 x i8> %0 to x86_mmx 1385 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1386 %5 = bitcast x86_mmx %4 to <8 x i8> 1387 %6 = bitcast <8 x i8> %5 to <1 x i64> 1388 %7 = extractelement <1 x i64> %6, i32 0 1389 ret i64 %7 1390} 1391 1392declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 1393 1394define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1395; ALL-LABEL: @test6 1396; ALL: phsubsw 1397entry: 1398 %0 = bitcast <1 x i64> %b to <4 x i16> 1399 %1 = bitcast <1 x i64> %a to <4 x i16> 1400 %2 = bitcast <4 x i16> %1 to x86_mmx 1401 %3 = bitcast <4 x i16> %0 to x86_mmx 1402 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1403 %5 = bitcast x86_mmx %4 to <4 x i16> 1404 %6 = bitcast <4 x i16> %5 to <1 x i64> 1405 %7 = extractelement <1 x i64> %6, i32 0 1406 ret i64 %7 1407} 1408 1409declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 1410 1411define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1412; ALL-LABEL: @test5 1413; ALL: phsubd 1414entry: 1415 %0 = bitcast <1 x i64> %b to <2 x i32> 1416 %1 = bitcast <1 x i64> %a to <2 x i32> 1417 %2 = bitcast <2 x i32> %1 to x86_mmx 1418 %3 = bitcast <2 x i32> %0 to x86_mmx 1419 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1420 %5 = bitcast x86_mmx %4 to <2 x i32> 1421 %6 = bitcast <2 x i32> %5 to <1 x i64> 1422 %7 = extractelement <1 x i64> %6, i32 0 1423 ret i64 %7 1424} 1425 1426declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 1427 1428define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1429; ALL-LABEL: @test4 1430; ALL: phsubw 1431entry: 1432 %0 = bitcast <1 x i64> %b to <4 x i16> 1433 %1 = bitcast <1 x i64> %a to <4 x i16> 1434 %2 = bitcast <4 x i16> %1 to x86_mmx 1435 %3 = bitcast <4 x i16> %0 to x86_mmx 1436 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1437 %5 = bitcast x86_mmx %4 to <4 x i16> 1438 %6 = bitcast <4 x i16> %5 to <1 x i64> 1439 %7 = extractelement <1 x i64> %6, i32 0 1440 ret i64 %7 1441} 1442 1443declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 1444 1445define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1446; ALL-LABEL: @test3 1447; ALL: phaddsw 1448entry: 1449 %0 = bitcast <1 x i64> %b to <4 x i16> 1450 %1 = bitcast <1 x i64> %a to <4 x i16> 1451 %2 = bitcast <4 x i16> %1 to x86_mmx 1452 %3 = bitcast <4 x i16> %0 to x86_mmx 1453 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1454 %5 = bitcast x86_mmx %4 to <4 x i16> 1455 %6 = bitcast <4 x i16> %5 to <1 x i64> 1456 %7 = extractelement <1 x i64> %6, i32 0 1457 ret i64 %7 1458} 1459 1460declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 1461 1462define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1463; ALL-LABEL: @test2 1464; ALL: phaddd 1465entry: 1466 %0 = bitcast <1 x i64> %b to <2 x i32> 1467 %1 = bitcast <1 x i64> %a to <2 x i32> 1468 %2 = bitcast <2 x i32> %1 to x86_mmx 1469 %3 = bitcast <2 x i32> %0 to x86_mmx 1470 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1471 %5 = bitcast x86_mmx %4 to <2 x i32> 1472 %6 = bitcast <2 x i32> %5 to <1 x i64> 1473 %7 = extractelement <1 x i64> %6, i32 0 1474 ret i64 %7 1475} 1476 1477define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { 1478; ALL-LABEL: @test89 1479; ALL: cvtpi2ps 1480 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) 1481 ret <4 x float> %c 1482} 1483 1484declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 1485 1486; ALL-LABEL: test90 1487define void @test90() { 1488; ALL-LABEL: @test90 1489; ALL: emms 1490 call void @llvm.x86.mmx.emms() 1491 ret void 1492} 1493 1494declare void @llvm.x86.mmx.emms() 1495 1496define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind { 1497; X86-LABEL: test_mm_insert_pi16: 1498; X86: # %bb.0: # %entry 1499; X86-NEXT: pushl %ebp 1500; X86-NEXT: movl %esp, %ebp 1501; X86-NEXT: andl $-8, %esp 1502; X86-NEXT: subl $16, %esp 1503; X86-NEXT: movl 8(%ebp), %eax 1504; X86-NEXT: movl 12(%ebp), %ecx 1505; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1506; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) 1507; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 1508; X86-NEXT: pinsrw $2, 16(%ebp), %mm0 1509; X86-NEXT: movq %mm0, (%esp) 1510; X86-NEXT: movl (%esp), %eax 1511; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1512; X86-NEXT: movl %ebp, %esp 1513; X86-NEXT: popl %ebp 1514; X86-NEXT: retl 1515; 1516; X64-LABEL: test_mm_insert_pi16: 1517; X64: # %bb.0: # %entry 1518; X64-NEXT: movq %rdi, %mm0 1519; X64-NEXT: pinsrw $2, %esi, %mm0 1520; X64-NEXT: movq %mm0, %rax 1521; X64-NEXT: retq 1522entry: 1523 %0 = bitcast <1 x i64> %a.coerce to x86_mmx 1524 %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) 1525 %2 = bitcast x86_mmx %1 to <1 x i64> 1526 ret <1 x i64> %2 1527} 1528 1529declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) 1530 1531define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind { 1532; X86-LABEL: test_mm_extract_pi16: 1533; X86: # %bb.0: # %entry 1534; X86-NEXT: pushl %ebp 1535; X86-NEXT: movl %esp, %ebp 1536; X86-NEXT: andl $-8, %esp 1537; X86-NEXT: subl $8, %esp 1538; X86-NEXT: movl 8(%ebp), %eax 1539; X86-NEXT: movl 12(%ebp), %ecx 1540; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1541; X86-NEXT: movl %eax, (%esp) 1542; X86-NEXT: movq (%esp), %mm0 1543; X86-NEXT: pextrw $2, %mm0, %eax 1544; X86-NEXT: movl %ebp, %esp 1545; X86-NEXT: popl %ebp 1546; X86-NEXT: retl 1547; 1548; X64-LABEL: test_mm_extract_pi16: 1549; X64: # %bb.0: # %entry 1550; X64-NEXT: movq %rdi, %mm0 1551; X64-NEXT: pextrw $2, %mm0, %eax 1552; X64-NEXT: retq 1553entry: 1554 %0 = bitcast <1 x i64> %a.coerce to x86_mmx 1555 %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) 1556 ret i32 %1 1557} 1558 1559declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) 1560