1; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s 2; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s 3 4%shifttype = type <2 x i16> 5define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { 6entry: 7 ; SSE2: shift2i16 8 ; SSE2: cost of 12 {{.*}} ashr 9 ; SSE2-CODEGEN: shift2i16 10 ; SSE2-CODEGEN: psrlq 11 12 %0 = ashr %shifttype %a , %b 13 ret %shifttype %0 14} 15 16%shifttype4i16 = type <4 x i16> 17define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { 18entry: 19 ; SSE2: shift4i16 20 ; SSE2: cost of 16 {{.*}} ashr 21 ; SSE2-CODEGEN: shift4i16 22 ; SSE2-CODEGEN: psrad 23 24 %0 = ashr %shifttype4i16 %a , %b 25 ret %shifttype4i16 %0 26} 27 28%shifttype8i16 = type <8 x i16> 29define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { 30entry: 31 ; SSE2: shift8i16 32 ; SSE2: cost of 32 {{.*}} ashr 33 ; SSE2-CODEGEN: shift8i16 34 ; SSE2-CODEGEN: psraw 35 36 %0 = ashr %shifttype8i16 %a , %b 37 ret %shifttype8i16 %0 38} 39 40%shifttype16i16 = type <16 x i16> 41define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { 42entry: 43 ; SSE2: shift16i16 44 ; SSE2: cost of 64 {{.*}} ashr 45 ; SSE2-CODEGEN: shift16i16 46 ; SSE2-CODEGEN: psraw 47 48 %0 = ashr %shifttype16i16 %a , %b 49 ret %shifttype16i16 %0 50} 51 52%shifttype32i16 = type <32 x i16> 53define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { 54entry: 55 ; SSE2: shift32i16 56 ; SSE2: cost of 128 {{.*}} ashr 57 ; SSE2-CODEGEN: shift32i16 58 ; SSE2-CODEGEN: psraw 59 60 %0 = ashr %shifttype32i16 %a , %b 61 ret %shifttype32i16 %0 62} 63 64%shifttype2i32 = type <2 x i32> 65define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { 66entry: 67 ; SSE2: shift2i32 68 ; SSE2: cost of 12 {{.*}} ashr 69 ; SSE2-CODEGEN: shift2i32 70 ; SSE2-CODEGEN: psrlq 71 72 %0 = ashr %shifttype2i32 %a , %b 73 ret %shifttype2i32 %0 74} 75 76%shifttype4i32 = type <4 x i32> 77define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { 78entry: 79 ; SSE2: shift4i32 80 ; SSE2: cost of 16 {{.*}} ashr 81 ; SSE2-CODEGEN: shift4i32 82 ; SSE2-CODEGEN: psrad 83 84 %0 = ashr %shifttype4i32 %a , %b 85 ret %shifttype4i32 %0 86} 87 88%shifttype8i32 = type <8 x i32> 89define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { 90entry: 91 ; SSE2: shift8i32 92 ; SSE2: cost of 32 {{.*}} ashr 93 ; SSE2-CODEGEN: shift8i32 94 ; SSE2-CODEGEN: psrad 95 96 %0 = ashr %shifttype8i32 %a , %b 97 ret %shifttype8i32 %0 98} 99 100%shifttype16i32 = type <16 x i32> 101define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { 102entry: 103 ; SSE2: shift16i32 104 ; SSE2: cost of 64 {{.*}} ashr 105 ; SSE2-CODEGEN: shift16i32 106 ; SSE2-CODEGEN: psrad 107 108 %0 = ashr %shifttype16i32 %a , %b 109 ret %shifttype16i32 %0 110} 111 112%shifttype32i32 = type <32 x i32> 113define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { 114entry: 115 ; SSE2: shift32i32 116 ; SSE2: cost of 128 {{.*}} ashr 117 ; SSE2-CODEGEN: shift32i32 118 ; SSE2-CODEGEN: psrad 119 120 %0 = ashr %shifttype32i32 %a , %b 121 ret %shifttype32i32 %0 122} 123 124%shifttype2i64 = type <2 x i64> 125define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { 126entry: 127 ; SSE2: shift2i64 128 ; SSE2: cost of 12 {{.*}} ashr 129 ; SSE2-CODEGEN: shift2i64 130 ; SSE2-CODEGEN: psrlq 131 132 %0 = ashr %shifttype2i64 %a , %b 133 ret %shifttype2i64 %0 134} 135 136%shifttype4i64 = type <4 x i64> 137define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { 138entry: 139 ; SSE2: shift4i64 140 ; SSE2: cost of 24 {{.*}} ashr 141 ; SSE2-CODEGEN: shift4i64 142 ; SSE2-CODEGEN: psrlq 143 144 %0 = ashr %shifttype4i64 %a , %b 145 ret %shifttype4i64 %0 146} 147 148%shifttype8i64 = type <8 x i64> 149define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { 150entry: 151 ; SSE2: shift8i64 152 ; SSE2: cost of 48 {{.*}} ashr 153 ; SSE2-CODEGEN: shift8i64 154 ; SSE2-CODEGEN: psrlq 155 156 %0 = ashr %shifttype8i64 %a , %b 157 ret %shifttype8i64 %0 158} 159 160%shifttype16i64 = type <16 x i64> 161define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { 162entry: 163 ; SSE2: shift16i64 164 ; SSE2: cost of 96 {{.*}} ashr 165 ; SSE2-CODEGEN: shift16i64 166 ; SSE2-CODEGEN: psrlq 167 168 %0 = ashr %shifttype16i64 %a , %b 169 ret %shifttype16i64 %0 170} 171 172%shifttype32i64 = type <32 x i64> 173define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { 174entry: 175 ; SSE2: shift32i64 176 ; SSE2: cost of 192 {{.*}} ashr 177 ; SSE2-CODEGEN: shift32i64 178 ; SSE2-CODEGEN: psrlq 179 180 %0 = ashr %shifttype32i64 %a , %b 181 ret %shifttype32i64 %0 182} 183 184%shifttype2i8 = type <2 x i8> 185define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { 186entry: 187 ; SSE2: shift2i8 188 ; SSE2: cost of 12 {{.*}} ashr 189 ; SSE2-CODEGEN: shift2i8 190 ; SSE2-CODEGEN: psrlq 191 192 %0 = ashr %shifttype2i8 %a , %b 193 ret %shifttype2i8 %0 194} 195 196%shifttype4i8 = type <4 x i8> 197define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { 198entry: 199 ; SSE2: shift4i8 200 ; SSE2: cost of 16 {{.*}} ashr 201 ; SSE2-CODEGEN: shift4i8 202 ; SSE2-CODEGEN: psrad 203 204 %0 = ashr %shifttype4i8 %a , %b 205 ret %shifttype4i8 %0 206} 207 208%shifttype8i8 = type <8 x i8> 209define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { 210entry: 211 ; SSE2: shift8i8 212 ; SSE2: cost of 32 {{.*}} ashr 213 ; SSE2-CODEGEN: shift8i8 214 ; SSE2-CODEGEN: psraw 215 216 %0 = ashr %shifttype8i8 %a , %b 217 ret %shifttype8i8 %0 218} 219 220%shifttype16i8 = type <16 x i8> 221define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { 222entry: 223 ; SSE2: shift16i8 224 ; SSE2: cost of 54 {{.*}} ashr 225 ; SSE2-CODEGEN: shift16i8 226 ; SSE2-CODEGEN: psraw 227 228 %0 = ashr %shifttype16i8 %a , %b 229 ret %shifttype16i8 %0 230} 231 232%shifttype32i8 = type <32 x i8> 233define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { 234entry: 235 ; SSE2: shift32i8 236 ; SSE2: cost of 108 {{.*}} ashr 237 ; SSE2-CODEGEN: shift32i8 238 ; SSE2-CODEGEN: psraw 239 240 %0 = ashr %shifttype32i8 %a , %b 241 ret %shifttype32i8 %0 242} 243 244; Test shift by a constant a value. 245 246%shifttypec = type <2 x i16> 247define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) { 248entry: 249 ; SSE2: shift2i16const 250 ; SSE2: cost of 4 {{.*}} ashr 251 ; SSE2-CODEGEN: shift2i16const 252 ; SSE2-CODEGEN: psrad $3 253 254 %0 = ashr %shifttypec %a , <i16 3, i16 3> 255 ret %shifttypec %0 256} 257 258%shifttypec4i16 = type <4 x i16> 259define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) { 260entry: 261 ; SSE2: shift4i16const 262 ; SSE2: cost of 1 {{.*}} ashr 263 ; SSE2-CODEGEN: shift4i16const 264 ; SSE2-CODEGEN: psrad $3 265 266 %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3> 267 ret %shifttypec4i16 %0 268} 269 270%shifttypec8i16 = type <8 x i16> 271define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) { 272entry: 273 ; SSE2: shift8i16const 274 ; SSE2: cost of 1 {{.*}} ashr 275 ; SSE2-CODEGEN: shift8i16const 276 ; SSE2-CODEGEN: psraw $3 277 278 %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3, 279 i16 3, i16 3, i16 3, i16 3> 280 ret %shifttypec8i16 %0 281} 282 283%shifttypec16i16 = type <16 x i16> 284define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a, 285 %shifttypec16i16 %b) { 286entry: 287 ; SSE2: shift16i16const 288 ; SSE2: cost of 2 {{.*}} ashr 289 ; SSE2-CODEGEN: shift16i16const 290 ; SSE2-CODEGEN: psraw $3 291 292 %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3, 293 i16 3, i16 3, i16 3, i16 3, 294 i16 3, i16 3, i16 3, i16 3, 295 i16 3, i16 3, i16 3, i16 3> 296 ret %shifttypec16i16 %0 297} 298 299%shifttypec32i16 = type <32 x i16> 300define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a, 301 %shifttypec32i16 %b) { 302entry: 303 ; SSE2: shift32i16const 304 ; SSE2: cost of 4 {{.*}} ashr 305 ; SSE2-CODEGEN: shift32i16const 306 ; SSE2-CODEGEN: psraw $3 307 308 %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3, 309 i16 3, i16 3, i16 3, i16 3, 310 i16 3, i16 3, i16 3, i16 3, 311 i16 3, i16 3, i16 3, i16 3, 312 i16 3, i16 3, i16 3, i16 3, 313 i16 3, i16 3, i16 3, i16 3, 314 i16 3, i16 3, i16 3, i16 3, 315 i16 3, i16 3, i16 3, i16 3> 316 ret %shifttypec32i16 %0 317} 318 319%shifttypec2i32 = type <2 x i32> 320define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) { 321entry: 322 ; SSE2: shift2i32c 323 ; SSE2: cost of 4 {{.*}} ashr 324 ; SSE2-CODEGEN: shift2i32c 325 ; SSE2-CODEGEN: psrad $3 326 327 %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3> 328 ret %shifttypec2i32 %0 329} 330 331%shifttypec4i32 = type <4 x i32> 332define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) { 333entry: 334 ; SSE2: shift4i32c 335 ; SSE2: cost of 1 {{.*}} ashr 336 ; SSE2-CODEGEN: shift4i32c 337 ; SSE2-CODEGEN: psrad $3 338 339 %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3> 340 ret %shifttypec4i32 %0 341} 342 343%shifttypec8i32 = type <8 x i32> 344define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) { 345entry: 346 ; SSE2: shift8i32c 347 ; SSE2: cost of 2 {{.*}} ashr 348 ; SSE2-CODEGEN: shift8i32c 349 ; SSE2-CODEGEN: psrad $3 350 351 %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3, 352 i32 3, i32 3, i32 3, i32 3> 353 ret %shifttypec8i32 %0 354} 355 356%shifttypec16i32 = type <16 x i32> 357define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) { 358entry: 359 ; SSE2: shift16i32c 360 ; SSE2: cost of 4 {{.*}} ashr 361 ; SSE2-CODEGEN: shift16i32c 362 ; SSE2-CODEGEN: psrad $3 363 364 %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3, 365 i32 3, i32 3, i32 3, i32 3, 366 i32 3, i32 3, i32 3, i32 3, 367 i32 3, i32 3, i32 3, i32 3> 368 ret %shifttypec16i32 %0 369} 370 371%shifttypec32i32 = type <32 x i32> 372define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { 373entry: 374 ; SSE2: shift32i32c 375 ; getTypeConversion fails here and promotes this to a i64. 376 ; SSE2: cost of 8 {{.*}} ashr 377 ; SSE2-CODEGEN: shift32i32c 378 ; SSE2-CODEGEN: psrad $3 379 %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, 380 i32 3, i32 3, i32 3, i32 3, 381 i32 3, i32 3, i32 3, i32 3, 382 i32 3, i32 3, i32 3, i32 3, 383 i32 3, i32 3, i32 3, i32 3, 384 i32 3, i32 3, i32 3, i32 3, 385 i32 3, i32 3, i32 3, i32 3, 386 i32 3, i32 3, i32 3, i32 3> 387 ret %shifttypec32i32 %0 388} 389 390%shifttypec2i64 = type <2 x i64> 391define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) { 392entry: 393 ; SSE2: shift2i64c 394 ; SSE2: cost of 4 {{.*}} ashr 395 ; SSE2-CODEGEN: shift2i64c 396 ; SSE2-CODEGEN: psrad $3 397 398 %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3> 399 ret %shifttypec2i64 %0 400} 401 402%shifttypec4i64 = type <4 x i64> 403define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) { 404entry: 405 ; SSE2: shift4i64c 406 ; SSE2: cost of 8 {{.*}} ashr 407 ; SSE2-CODEGEN: shift4i64c 408 ; SSE2-CODEGEN: psrad $3 409 410 %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3> 411 ret %shifttypec4i64 %0 412} 413 414%shifttypec8i64 = type <8 x i64> 415define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) { 416entry: 417 ; SSE2: shift8i64c 418 ; SSE2: cost of 16 {{.*}} ashr 419 ; SSE2-CODEGEN: shift8i64c 420 ; SSE2-CODEGEN: psrad $3 421 422 %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3, 423 i64 3, i64 3, i64 3, i64 3> 424 ret %shifttypec8i64 %0 425} 426 427%shifttypec16i64 = type <16 x i64> 428define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) { 429entry: 430 ; SSE2: shift16i64c 431 ; SSE2: cost of 32 {{.*}} ashr 432 ; SSE2-CODEGEN: shift16i64c 433 ; SSE2-CODEGEN: psrad $3 434 435 %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3, 436 i64 3, i64 3, i64 3, i64 3, 437 i64 3, i64 3, i64 3, i64 3, 438 i64 3, i64 3, i64 3, i64 3> 439 ret %shifttypec16i64 %0 440} 441 442%shifttypec32i64 = type <32 x i64> 443define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { 444entry: 445 ; SSE2: shift32i64c 446 ; SSE2: cost of 64 {{.*}} ashr 447 ; SSE2-CODEGEN: shift32i64c 448 ; SSE2-CODEGEN: psrad $3 449 450 %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3, 451 i64 3, i64 3, i64 3, i64 3, 452 i64 3, i64 3, i64 3, i64 3, 453 i64 3, i64 3, i64 3, i64 3, 454 i64 3, i64 3, i64 3, i64 3, 455 i64 3, i64 3, i64 3, i64 3, 456 i64 3, i64 3, i64 3, i64 3, 457 i64 3, i64 3, i64 3, i64 3> 458 ret %shifttypec32i64 %0 459} 460 461%shifttypec2i8 = type <2 x i8> 462define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) { 463entry: 464 ; SSE2: shift2i8c 465 ; SSE2: cost of 4 {{.*}} ashr 466 ; SSE2-CODEGEN: shift2i8c 467 ; SSE2-CODEGEN: psrad $3 468 469 %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3> 470 ret %shifttypec2i8 %0 471} 472 473%shifttypec4i8 = type <4 x i8> 474define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { 475entry: 476 ; SSE2: shift4i8c 477 ; SSE2: cost of 1 {{.*}} ashr 478 ; SSE2-CODEGEN: shift4i8c 479 ; SSE2-CODEGEN: psrad $3 480 481 %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3> 482 ret %shifttypec4i8 %0 483} 484 485%shifttypec8i8 = type <8 x i8> 486define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { 487entry: 488 ; SSE2: shift8i8c 489 ; SSE2: cost of 1 {{.*}} ashr 490 ; SSE2-CODEGEN: shift8i8c 491 ; SSE2-CODEGEN: psraw $3 492 493 %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3, 494 i8 3, i8 3, i8 3, i8 3> 495 ret %shifttypec8i8 %0 496} 497 498%shifttypec16i8 = type <16 x i8> 499define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { 500entry: 501 ; SSE2: shift16i8c 502 ; SSE2: cost of 4 {{.*}} ashr 503 ; SSE2-CODEGEN: shift16i8c 504 ; SSE2-CODEGEN: psrlw $3 505 506 %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3, 507 i8 3, i8 3, i8 3, i8 3, 508 i8 3, i8 3, i8 3, i8 3, 509 i8 3, i8 3, i8 3, i8 3> 510 ret %shifttypec16i8 %0 511} 512 513%shifttypec32i8 = type <32 x i8> 514define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { 515entry: 516 ; SSE2: shift32i8c 517 ; SSE2: cost of 8 {{.*}} ashr 518 ; SSE2-CODEGEN: shift32i8c 519 ; SSE2-CODEGEN: psrlw $3 520 521 %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3, 522 i8 3, i8 3, i8 3, i8 3, 523 i8 3, i8 3, i8 3, i8 3, 524 i8 3, i8 3, i8 3, i8 3, 525 i8 3, i8 3, i8 3, i8 3, 526 i8 3, i8 3, i8 3, i8 3, 527 i8 3, i8 3, i8 3, i8 3, 528 i8 3, i8 3, i8 3, i8 3> 529 ret %shifttypec32i8 %0 530} 531 532