1; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s 2; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s 3 4%shifttype = type <2 x i16> 5define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { 6entry: 7 ; SSE2: shift2i16 8 ; SSE2: cost of 4 {{.*}} shl 9 ; SSE2-CODEGEN: shift2i16 10 ; SSE2-CODEGEN: psllq 11 12 %0 = shl %shifttype %a , %b 13 ret %shifttype %0 14} 15 16%shifttype4i16 = type <4 x i16> 17define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { 18entry: 19 ; SSE2: shift4i16 20 ; SSE2: cost of 10 {{.*}} shl 21 ; SSE2-CODEGEN: shift4i16 22 ; SSE2-CODEGEN: pmuludq 23 24 %0 = shl %shifttype4i16 %a , %b 25 ret %shifttype4i16 %0 26} 27 28%shifttype8i16 = type <8 x i16> 29define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { 30entry: 31 ; SSE2: shift8i16 32 ; SSE2: cost of 32 {{.*}} shl 33 ; SSE2-CODEGEN: shift8i16 34 ; SSE2-CODEGEN: psllw 35 36 %0 = shl %shifttype8i16 %a , %b 37 ret %shifttype8i16 %0 38} 39 40%shifttype16i16 = type <16 x i16> 41define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { 42entry: 43 ; SSE2: shift16i16 44 ; SSE2: cost of 64 {{.*}} shl 45 ; SSE2-CODEGEN: shift16i16 46 ; SSE2-CODEGEN: psllw 47 48 %0 = shl %shifttype16i16 %a , %b 49 ret %shifttype16i16 %0 50} 51 52%shifttype32i16 = type <32 x i16> 53define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { 54entry: 55 ; SSE2: shift32i16 56 ; SSE2: cost of 128 {{.*}} shl 57 ; SSE2-CODEGEN: shift32i16 58 ; SSE2-CODEGEN: psllw 59 60 %0 = shl %shifttype32i16 %a , %b 61 ret %shifttype32i16 %0 62} 63 64%shifttype2i32 = type <2 x i32> 65define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { 66entry: 67 ; SSE2: shift2i32 68 ; SSE2: cost of 4 {{.*}} shl 69 ; SSE2-CODEGEN: shift2i32 70 ; SSE2-CODEGEN: psllq 71 72 %0 = shl %shifttype2i32 %a , %b 73 ret %shifttype2i32 %0 74} 75 76%shifttype4i32 = type <4 x i32> 77define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { 78entry: 79 ; SSE2: shift4i32 80 ; SSE2: cost of 10 {{.*}} shl 81 ; SSE2-CODEGEN: shift4i32 82 ; SSE2-CODEGEN: pmuludq 83 84 %0 = shl %shifttype4i32 %a , %b 85 ret %shifttype4i32 %0 86} 87 88%shifttype8i32 = type <8 x i32> 89define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { 90entry: 91 ; SSE2: shift8i32 92 ; SSE2: cost of 20 {{.*}} shl 93 ; SSE2-CODEGEN: shift8i32 94 ; SSE2-CODEGEN: pmuludq 95 96 %0 = shl %shifttype8i32 %a , %b 97 ret %shifttype8i32 %0 98} 99 100%shifttype16i32 = type <16 x i32> 101define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { 102entry: 103 ; SSE2: shift16i32 104 ; SSE2: cost of 40 {{.*}} shl 105 ; SSE2-CODEGEN: shift16i32 106 ; SSE2-CODEGEN: pmuludq 107 108 %0 = shl %shifttype16i32 %a , %b 109 ret %shifttype16i32 %0 110} 111 112%shifttype32i32 = type <32 x i32> 113define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { 114entry: 115 ; SSE2: shift32i32 116 ; SSE2: cost of 80 {{.*}} shl 117 ; SSE2-CODEGEN: shift32i32 118 ; SSE2-CODEGEN: pmuludq 119 120 %0 = shl %shifttype32i32 %a , %b 121 ret %shifttype32i32 %0 122} 123 124%shifttype2i64 = type <2 x i64> 125define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { 126entry: 127 ; SSE2: shift2i64 128 ; SSE2: cost of 4 {{.*}} shl 129 ; SSE2-CODEGEN: shift2i64 130 ; SSE2-CODEGEN: psllq 131 132 %0 = shl %shifttype2i64 %a , %b 133 ret %shifttype2i64 %0 134} 135 136%shifttype4i64 = type <4 x i64> 137define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { 138entry: 139 ; SSE2: shift4i64 140 ; SSE2: cost of 8 {{.*}} shl 141 ; SSE2-CODEGEN: shift4i64 142 ; SSE2-CODEGEN: psllq 143 144 %0 = shl %shifttype4i64 %a , %b 145 ret %shifttype4i64 %0 146} 147 148%shifttype8i64 = type <8 x i64> 149define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { 150entry: 151 ; SSE2: shift8i64 152 ; SSE2: cost of 16 {{.*}} shl 153 ; SSE2-CODEGEN: shift8i64 154 ; SSE2-CODEGEN: psllq 155 156 %0 = shl %shifttype8i64 %a , %b 157 ret %shifttype8i64 %0 158} 159 160%shifttype16i64 = type <16 x i64> 161define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { 162entry: 163 ; SSE2: shift16i64 164 ; SSE2: cost of 32 {{.*}} shl 165 ; SSE2-CODEGEN: shift16i64 166 ; SSE2-CODEGEN: psllq 167 168 %0 = shl %shifttype16i64 %a , %b 169 ret %shifttype16i64 %0 170} 171 172%shifttype32i64 = type <32 x i64> 173define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { 174entry: 175 ; SSE2: shift32i64 176 ; SSE2: cost of 64 {{.*}} shl 177 ; SSE2-CODEGEN: shift32i64 178 ; SSE2-CODEGEN: psllq 179 180 %0 = shl %shifttype32i64 %a , %b 181 ret %shifttype32i64 %0 182} 183 184%shifttype2i8 = type <2 x i8> 185define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { 186entry: 187 ; SSE2: shift2i8 188 ; SSE2: cost of 4 {{.*}} shl 189 ; SSE2-CODEGEN: shift2i8 190 ; SSE2-CODEGEN: psllq 191 192 %0 = shl %shifttype2i8 %a , %b 193 ret %shifttype2i8 %0 194} 195 196%shifttype4i8 = type <4 x i8> 197define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { 198entry: 199 ; SSE2: shift4i8 200 ; SSE2: cost of 10 {{.*}} shl 201 ; SSE2-CODEGEN: shift4i8 202 ; SSE2-CODEGEN: pmuludq 203 204 %0 = shl %shifttype4i8 %a , %b 205 ret %shifttype4i8 %0 206} 207 208%shifttype8i8 = type <8 x i8> 209define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { 210entry: 211 ; SSE2: shift8i8 212 ; SSE2: cost of 32 {{.*}} shl 213 ; SSE2-CODEGEN: shift8i8 214 ; SSE2-CODEGEN: psllw 215 216 %0 = shl %shifttype8i8 %a , %b 217 ret %shifttype8i8 %0 218} 219 220%shifttype16i8 = type <16 x i8> 221define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { 222entry: 223 ; SSE2: shift16i8 224 ; SSE2: cost of 26 {{.*}} shl 225 ; SSE2-CODEGEN: shift16i8 226 ; SSE2-CODEGEN: psllw 227 228 %0 = shl %shifttype16i8 %a , %b 229 ret %shifttype16i8 %0 230} 231 232%shifttype32i8 = type <32 x i8> 233define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { 234entry: 235 ; SSE2: shift32i8 236 ; SSE2: cost of 52 {{.*}} shl 237 ; SSE2-CODEGEN: shift32i8 238 ; SSE2-CODEGEN: psllw 239 240 %0 = shl %shifttype32i8 %a , %b 241 ret %shifttype32i8 %0 242} 243 244; Test shift by a constant vector. 245 246%shifttypec = type <2 x i16> 247define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) { 248entry: 249 ; SSE2: shift2i16const 250 ; SSE2: cost of 1 {{.*}} shl 251 ; SSE2-CODEGEN: shift2i16const 252 ; SSE2-CODEGEN: psllq $3 253 254 %0 = shl %shifttypec %a , <i16 3, i16 3> 255 ret %shifttypec %0 256} 257 258%shifttypec4i16 = type <4 x i16> 259define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) { 260entry: 261 ; SSE2: shift4i16const 262 ; SSE2: cost of 1 {{.*}} shl 263 ; SSE2-CODEGEN: shift4i16const 264 ; SSE2-CODEGEN: pslld $3 265 266 %0 = shl %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3> 267 ret %shifttypec4i16 %0 268} 269 270%shifttypec8i16 = type <8 x i16> 271define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) { 272entry: 273 ; SSE2: shift8i16const 274 ; SSE2: cost of 1 {{.*}} shl 275 ; SSE2-CODEGEN: shift8i16const 276 ; SSE2-CODEGEN: psllw $3 277 278 %0 = shl %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3, 279 i16 3, i16 3, i16 3, i16 3> 280 ret %shifttypec8i16 %0 281} 282 283%shifttypec16i16 = type <16 x i16> 284define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a, 285 %shifttypec16i16 %b) { 286entry: 287 ; SSE2: shift16i16const 288 ; SSE2: cost of 2 {{.*}} shl 289 ; SSE2-CODEGEN: shift16i16const 290 ; SSE2-CODEGEN: psllw $3 291 292 %0 = shl %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3, 293 i16 3, i16 3, i16 3, i16 3, 294 i16 3, i16 3, i16 3, i16 3, 295 i16 3, i16 3, i16 3, i16 3> 296 ret %shifttypec16i16 %0 297} 298 299%shifttypec32i16 = type <32 x i16> 300define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a, 301 %shifttypec32i16 %b) { 302entry: 303 ; SSE2: shift32i16const 304 ; SSE2: cost of 4 {{.*}} shl 305 ; SSE2-CODEGEN: shift32i16const 306 ; SSE2-CODEGEN: psllw $3 307 308 %0 = shl %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3, 309 i16 3, i16 3, i16 3, i16 3, 310 i16 3, i16 3, i16 3, i16 3, 311 i16 3, i16 3, i16 3, i16 3, 312 i16 3, i16 3, i16 3, i16 3, 313 i16 3, i16 3, i16 3, i16 3, 314 i16 3, i16 3, i16 3, i16 3, 315 i16 3, i16 3, i16 3, i16 3> 316 ret %shifttypec32i16 %0 317} 318 319%shifttypec2i32 = type <2 x i32> 320define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) { 321entry: 322 ; SSE2: shift2i32c 323 ; SSE2: cost of 1 {{.*}} shl 324 ; SSE2-CODEGEN: shift2i32c 325 ; SSE2-CODEGEN: psllq $3 326 327 %0 = shl %shifttypec2i32 %a , <i32 3, i32 3> 328 ret %shifttypec2i32 %0 329} 330 331%shifttypec4i32 = type <4 x i32> 332define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) { 333entry: 334 ; SSE2: shift4i32c 335 ; SSE2: cost of 1 {{.*}} shl 336 ; SSE2-CODEGEN: shift4i32c 337 ; SSE2-CODEGEN: pslld $3 338 339 %0 = shl %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3> 340 ret %shifttypec4i32 %0 341} 342 343%shifttypec8i32 = type <8 x i32> 344define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) { 345entry: 346 ; SSE2: shift8i32c 347 ; SSE2: cost of 2 {{.*}} shl 348 ; SSE2-CODEGEN: shift8i32c 349 ; SSE2-CODEGEN: pslld $3 350 351 %0 = shl %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3, 352 i32 3, i32 3, i32 3, i32 3> 353 ret %shifttypec8i32 %0 354} 355 356%shifttypec16i32 = type <16 x i32> 357define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) { 358entry: 359 ; SSE2: shift16i32c 360 ; SSE2: cost of 4 {{.*}} shl 361 ; SSE2-CODEGEN: shift16i32c 362 ; SSE2-CODEGEN: pslld $3 363 364 %0 = shl %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3, 365 i32 3, i32 3, i32 3, i32 3, 366 i32 3, i32 3, i32 3, i32 3, 367 i32 3, i32 3, i32 3, i32 3> 368 ret %shifttypec16i32 %0 369} 370 371%shifttypec32i32 = type <32 x i32> 372define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { 373entry: 374 ; SSE2: shift32i32c 375 ; SSE2: cost of 8 {{.*}} shl 376 ; SSE2-CODEGEN: shift32i32c 377 ; SSE2-CODEGEN: pslld $3 378 %0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, 379 i32 3, i32 3, i32 3, i32 3, 380 i32 3, i32 3, i32 3, i32 3, 381 i32 3, i32 3, i32 3, i32 3, 382 i32 3, i32 3, i32 3, i32 3, 383 i32 3, i32 3, i32 3, i32 3, 384 i32 3, i32 3, i32 3, i32 3, 385 i32 3, i32 3, i32 3, i32 3> 386 ret %shifttypec32i32 %0 387} 388 389%shifttypec2i64 = type <2 x i64> 390define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) { 391entry: 392 ; SSE2: shift2i64c 393 ; SSE2: cost of 1 {{.*}} shl 394 ; SSE2-CODEGEN: shift2i64c 395 ; SSE2-CODEGEN: psllq $3 396 397 %0 = shl %shifttypec2i64 %a , <i64 3, i64 3> 398 ret %shifttypec2i64 %0 399} 400 401%shifttypec4i64 = type <4 x i64> 402define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) { 403entry: 404 ; SSE2: shift4i64c 405 ; SSE2: cost of 2 {{.*}} shl 406 ; SSE2-CODEGEN: shift4i64c 407 ; SSE2-CODEGEN: psllq $3 408 409 %0 = shl %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3> 410 ret %shifttypec4i64 %0 411} 412 413%shifttypec8i64 = type <8 x i64> 414define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) { 415entry: 416 ; SSE2: shift8i64c 417 ; SSE2: cost of 4 {{.*}} shl 418 ; SSE2-CODEGEN: shift8i64c 419 ; SSE2-CODEGEN: psllq $3 420 421 %0 = shl %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3, 422 i64 3, i64 3, i64 3, i64 3> 423 ret %shifttypec8i64 %0 424} 425 426%shifttypec16i64 = type <16 x i64> 427define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) { 428entry: 429 ; SSE2: shift16i64c 430 ; SSE2: cost of 8 {{.*}} shl 431 ; SSE2-CODEGEN: shift16i64c 432 ; SSE2-CODEGEN: psllq $3 433 434 %0 = shl %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3, 435 i64 3, i64 3, i64 3, i64 3, 436 i64 3, i64 3, i64 3, i64 3, 437 i64 3, i64 3, i64 3, i64 3> 438 ret %shifttypec16i64 %0 439} 440 441%shifttypec32i64 = type <32 x i64> 442define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { 443entry: 444 ; SSE2: shift32i64c 445 ; SSE2: cost of 16 {{.*}} shl 446 ; SSE2-CODEGEN: shift32i64c 447 ; SSE2-CODEGEN: psllq $3 448 449 %0 = shl %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3, 450 i64 3, i64 3, i64 3, i64 3, 451 i64 3, i64 3, i64 3, i64 3, 452 i64 3, i64 3, i64 3, i64 3, 453 i64 3, i64 3, i64 3, i64 3, 454 i64 3, i64 3, i64 3, i64 3, 455 i64 3, i64 3, i64 3, i64 3, 456 i64 3, i64 3, i64 3, i64 3> 457 ret %shifttypec32i64 %0 458} 459 460%shifttypec2i8 = type <2 x i8> 461define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) { 462entry: 463 ; SSE2: shift2i8c 464 ; SSE2: cost of 1 {{.*}} shl 465 ; SSE2-CODEGEN: shift2i8c 466 ; SSE2-CODEGEN: psllq $3 467 468 %0 = shl %shifttypec2i8 %a , <i8 3, i8 3> 469 ret %shifttypec2i8 %0 470} 471 472%shifttypec4i8 = type <4 x i8> 473define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { 474entry: 475 ; SSE2: shift4i8c 476 ; SSE2: cost of 1 {{.*}} shl 477 ; SSE2-CODEGEN: shift4i8c 478 ; SSE2-CODEGEN: pslld $3 479 480 %0 = shl %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3> 481 ret %shifttypec4i8 %0 482} 483 484%shifttypec8i8 = type <8 x i8> 485define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { 486entry: 487 ; SSE2: shift8i8c 488 ; SSE2: cost of 1 {{.*}} shl 489 ; SSE2-CODEGEN: shift8i8c 490 ; SSE2-CODEGEN: psllw $3 491 492 %0 = shl %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3, 493 i8 3, i8 3, i8 3, i8 3> 494 ret %shifttypec8i8 %0 495} 496 497%shifttypec16i8 = type <16 x i8> 498define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { 499entry: 500 ; SSE2: shift16i8c 501 ; SSE2: cost of 1 {{.*}} shl 502 ; SSE2-CODEGEN: shift16i8c 503 ; SSE2-CODEGEN: psllw $3 504 505 %0 = shl %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3, 506 i8 3, i8 3, i8 3, i8 3, 507 i8 3, i8 3, i8 3, i8 3, 508 i8 3, i8 3, i8 3, i8 3> 509 ret %shifttypec16i8 %0 510} 511 512%shifttypec32i8 = type <32 x i8> 513define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { 514entry: 515 ; SSE2: shift32i8c 516 ; SSE2: cost of 2 {{.*}} shl 517 ; SSE2-CODEGEN: shift32i8c 518 ; SSE2-CODEGEN: psllw $3 519 520 %0 = shl %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3, 521 i8 3, i8 3, i8 3, i8 3, 522 i8 3, i8 3, i8 3, i8 3, 523 i8 3, i8 3, i8 3, i8 3, 524 i8 3, i8 3, i8 3, i8 3, 525 i8 3, i8 3, i8 3, i8 3, 526 i8 3, i8 3, i8 3, i8 3, 527 i8 3, i8 3, i8 3, i8 3> 528 ret %shifttypec32i8 %0 529} 530