1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX 4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX 6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 7 8; Verify the cost of vector logical shift right instructions. 9 10; 11; Variable Shifts 12; 13 14define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 15; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64': 16; SSE2: Found an estimated cost of 4 for instruction: %shift 17; SSE41: Found an estimated cost of 4 for instruction: %shift 18; AVX: Found an estimated cost of 4 for instruction: %shift 19; AVX2: Found an estimated cost of 1 for instruction: %shift 20; XOPAVX: Found an estimated cost of 2 for instruction: %shift 21; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 22 %shift = lshr <2 x i64> %a, %b 23 ret <2 x i64> %shift 24} 25 26define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 27; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': 28; SSE2: Found an estimated cost of 8 for instruction: %shift 29; SSE41: Found an estimated cost of 8 for instruction: %shift 30; AVX: Found an estimated cost of 8 for instruction: %shift 31; AVX2: Found an estimated cost of 1 for instruction: %shift 32; XOPAVX: Found an estimated cost of 4 for instruction: %shift 33; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 34 %shift = lshr <4 x i64> %a, %b 35 ret <4 x i64> %shift 36} 37 38define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 39; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': 40; SSE2: Found an estimated cost of 16 for instruction: %shift 41; SSE41: Found an estimated cost of 16 for instruction: %shift 42; AVX: Found an estimated cost of 16 for instruction: %shift 43; AVX2: Found an estimated cost of 1 for instruction: %shift 44; XOPAVX: Found an estimated cost of 2 for instruction: %shift 45; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 46 %shift = lshr <4 x i32> %a, %b 47 ret <4 x i32> %shift 48} 49 50define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 51; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': 52; SSE2: Found an estimated cost of 32 for instruction: %shift 53; SSE41: Found an estimated cost of 32 for instruction: %shift 54; AVX: Found an estimated cost of 32 for instruction: %shift 55; AVX2: Found an estimated cost of 1 for instruction: %shift 56; XOPAVX: Found an estimated cost of 4 for instruction: %shift 57; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 58 %shift = lshr <8 x i32> %a, %b 59 ret <8 x i32> %shift 60} 61 62define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 63; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': 64; SSE2: Found an estimated cost of 32 for instruction: %shift 65; SSE41: Found an estimated cost of 32 for instruction: %shift 66; AVX: Found an estimated cost of 32 for instruction: %shift 67; AVX2: Found an estimated cost of 32 for instruction: %shift 68; XOP: Found an estimated cost of 2 for instruction: %shift 69 %shift = lshr <8 x i16> %a, %b 70 ret <8 x i16> %shift 71} 72 73define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 74; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': 75; SSE2: Found an estimated cost of 64 for instruction: %shift 76; SSE41: Found an estimated cost of 64 for instruction: %shift 77; AVX: Found an estimated cost of 64 for instruction: %shift 78; AVX2: Found an estimated cost of 10 for instruction: %shift 79; XOP: Found an estimated cost of 4 for instruction: %shift 80 %shift = lshr <16 x i16> %a, %b 81 ret <16 x i16> %shift 82} 83 84define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 85; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': 86; SSE2: Found an estimated cost of 26 for instruction: %shift 87; SSE41: Found an estimated cost of 26 for instruction: %shift 88; AVX: Found an estimated cost of 26 for instruction: %shift 89; AVX2: Found an estimated cost of 26 for instruction: %shift 90; XOP: Found an estimated cost of 2 for instruction: %shift 91 %shift = lshr <16 x i8> %a, %b 92 ret <16 x i8> %shift 93} 94 95define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 96; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': 97; SSE2: Found an estimated cost of 52 for instruction: %shift 98; SSE41: Found an estimated cost of 52 for instruction: %shift 99; AVX: Found an estimated cost of 52 for instruction: %shift 100; AVX2: Found an estimated cost of 11 for instruction: %shift 101; XOP: Found an estimated cost of 4 for instruction: %shift 102 %shift = lshr <32 x i8> %a, %b 103 ret <32 x i8> %shift 104} 105 106; 107; Uniform Variable Shifts 108; 109 110define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 111; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64': 112; SSE2: Found an estimated cost of 4 for instruction: %shift 113; SSE41: Found an estimated cost of 4 for instruction: %shift 114; AVX: Found an estimated cost of 4 for instruction: %shift 115; AVX2: Found an estimated cost of 1 for instruction: %shift 116; XOPAVX: Found an estimated cost of 2 for instruction: %shift 117; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 118 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 119 %shift = lshr <2 x i64> %a, %splat 120 ret <2 x i64> %shift 121} 122 123define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 124; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': 125; SSE2: Found an estimated cost of 8 for instruction: %shift 126; SSE41: Found an estimated cost of 8 for instruction: %shift 127; AVX: Found an estimated cost of 8 for instruction: %shift 128; AVX2: Found an estimated cost of 1 for instruction: %shift 129; XOPAVX: Found an estimated cost of 4 for instruction: %shift 130; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 131 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 132 %shift = lshr <4 x i64> %a, %splat 133 ret <4 x i64> %shift 134} 135 136define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 137; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': 138; SSE2: Found an estimated cost of 16 for instruction: %shift 139; SSE41: Found an estimated cost of 16 for instruction: %shift 140; AVX: Found an estimated cost of 16 for instruction: %shift 141; AVX2: Found an estimated cost of 1 for instruction: %shift 142; XOPAVX: Found an estimated cost of 2 for instruction: %shift 143; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 144 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 145 %shift = lshr <4 x i32> %a, %splat 146 ret <4 x i32> %shift 147} 148 149define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 150; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': 151; SSE2: Found an estimated cost of 32 for instruction: %shift 152; SSE41: Found an estimated cost of 32 for instruction: %shift 153; AVX: Found an estimated cost of 32 for instruction: %shift 154; AVX2: Found an estimated cost of 1 for instruction: %shift 155; XOPAVX: Found an estimated cost of 4 for instruction: %shift 156; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 157 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 158 %shift = lshr <8 x i32> %a, %splat 159 ret <8 x i32> %shift 160} 161 162define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 163; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': 164; SSE2: Found an estimated cost of 32 for instruction: %shift 165; SSE41: Found an estimated cost of 32 for instruction: %shift 166; AVX: Found an estimated cost of 32 for instruction: %shift 167; AVX2: Found an estimated cost of 32 for instruction: %shift 168; XOP: Found an estimated cost of 2 for instruction: %shift 169 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 170 %shift = lshr <8 x i16> %a, %splat 171 ret <8 x i16> %shift 172} 173 174define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 175; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': 176; SSE2: Found an estimated cost of 64 for instruction: %shift 177; SSE41: Found an estimated cost of 64 for instruction: %shift 178; AVX: Found an estimated cost of 64 for instruction: %shift 179; AVX2: Found an estimated cost of 10 for instruction: %shift 180; XOP: Found an estimated cost of 4 for instruction: %shift 181 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 182 %shift = lshr <16 x i16> %a, %splat 183 ret <16 x i16> %shift 184} 185 186define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 187; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': 188; SSE2: Found an estimated cost of 26 for instruction: %shift 189; SSE41: Found an estimated cost of 26 for instruction: %shift 190; AVX: Found an estimated cost of 26 for instruction: %shift 191; AVX2: Found an estimated cost of 26 for instruction: %shift 192; XOP: Found an estimated cost of 2 for instruction: %shift 193 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 194 %shift = lshr <16 x i8> %a, %splat 195 ret <16 x i8> %shift 196} 197 198define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 199; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': 200; SSE2: Found an estimated cost of 52 for instruction: %shift 201; SSE41: Found an estimated cost of 52 for instruction: %shift 202; AVX: Found an estimated cost of 52 for instruction: %shift 203; AVX2: Found an estimated cost of 11 for instruction: %shift 204; XOP: Found an estimated cost of 4 for instruction: %shift 205 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 206 %shift = lshr <32 x i8> %a, %splat 207 ret <32 x i8> %shift 208} 209 210; 211; Constant Shifts 212; 213 214define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { 215; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64': 216; SSE2: Found an estimated cost of 4 for instruction: %shift 217; SSE41: Found an estimated cost of 4 for instruction: %shift 218; AVX: Found an estimated cost of 4 for instruction: %shift 219; AVX2: Found an estimated cost of 1 for instruction: %shift 220; XOPAVX: Found an estimated cost of 2 for instruction: %shift 221; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 222 %shift = lshr <2 x i64> %a, <i64 1, i64 7> 223 ret <2 x i64> %shift 224} 225 226define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { 227; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': 228; SSE2: Found an estimated cost of 8 for instruction: %shift 229; SSE41: Found an estimated cost of 8 for instruction: %shift 230; AVX: Found an estimated cost of 8 for instruction: %shift 231; AVX2: Found an estimated cost of 1 for instruction: %shift 232; XOPAVX: Found an estimated cost of 4 for instruction: %shift 233; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 234 %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31> 235 ret <4 x i64> %shift 236} 237 238define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { 239; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': 240; SSE2: Found an estimated cost of 16 for instruction: %shift 241; SSE41: Found an estimated cost of 16 for instruction: %shift 242; AVX: Found an estimated cost of 16 for instruction: %shift 243; AVX2: Found an estimated cost of 1 for instruction: %shift 244; XOPAVX: Found an estimated cost of 2 for instruction: %shift 245; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 246 %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7> 247 ret <4 x i32> %shift 248} 249 250define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { 251; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': 252; SSE2: Found an estimated cost of 32 for instruction: %shift 253; SSE41: Found an estimated cost of 32 for instruction: %shift 254; AVX: Found an estimated cost of 32 for instruction: %shift 255; AVX2: Found an estimated cost of 1 for instruction: %shift 256; XOPAVX: Found an estimated cost of 4 for instruction: %shift 257; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 258 %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 259 ret <8 x i32> %shift 260} 261 262define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { 263; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': 264; SSE2: Found an estimated cost of 32 for instruction: %shift 265; SSE41: Found an estimated cost of 32 for instruction: %shift 266; AVX: Found an estimated cost of 32 for instruction: %shift 267; AVX2: Found an estimated cost of 32 for instruction: %shift 268; XOP: Found an estimated cost of 2 for instruction: %shift 269 %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 270 ret <8 x i16> %shift 271} 272 273define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { 274; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': 275; SSE2: Found an estimated cost of 64 for instruction: %shift 276; SSE41: Found an estimated cost of 64 for instruction: %shift 277; AVX: Found an estimated cost of 64 for instruction: %shift 278; AVX2: Found an estimated cost of 10 for instruction: %shift 279; XOP: Found an estimated cost of 4 for instruction: %shift 280 %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 281 ret <16 x i16> %shift 282} 283 284define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { 285; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': 286; SSE2: Found an estimated cost of 26 for instruction: %shift 287; SSE41: Found an estimated cost of 26 for instruction: %shift 288; AVX: Found an estimated cost of 26 for instruction: %shift 289; AVX2: Found an estimated cost of 26 for instruction: %shift 290; XOP: Found an estimated cost of 2 for instruction: %shift 291 %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 292 ret <16 x i8> %shift 293} 294 295define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { 296; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': 297; SSE2: Found an estimated cost of 52 for instruction: %shift 298; SSE41: Found an estimated cost of 52 for instruction: %shift 299; AVX: Found an estimated cost of 52 for instruction: %shift 300; AVX2: Found an estimated cost of 11 for instruction: %shift 301; XOP: Found an estimated cost of 4 for instruction: %shift 302 %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 303 ret <32 x i8> %shift 304} 305 306; 307; Uniform Constant Shifts 308; 309 310define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { 311; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64': 312; SSE2: Found an estimated cost of 1 for instruction: %shift 313; SSE41: Found an estimated cost of 1 for instruction: %shift 314; AVX: Found an estimated cost of 1 for instruction: %shift 315; AVX2: Found an estimated cost of 1 for instruction: %shift 316; XOPAVX: Found an estimated cost of 2 for instruction: %shift 317; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 318 %shift = lshr <2 x i64> %a, <i64 7, i64 7> 319 ret <2 x i64> %shift 320} 321 322define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { 323; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': 324; SSE2: Found an estimated cost of 2 for instruction: %shift 325; SSE41: Found an estimated cost of 2 for instruction: %shift 326; AVX: Found an estimated cost of 2 for instruction: %shift 327; AVX2: Found an estimated cost of 1 for instruction: %shift 328; XOPAVX: Found an estimated cost of 4 for instruction: %shift 329; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 330 %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 331 ret <4 x i64> %shift 332} 333 334define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { 335; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': 336; SSE2: Found an estimated cost of 1 for instruction: %shift 337; SSE41: Found an estimated cost of 1 for instruction: %shift 338; AVX: Found an estimated cost of 1 for instruction: %shift 339; AVX2: Found an estimated cost of 1 for instruction: %shift 340; XOPAVX: Found an estimated cost of 2 for instruction: %shift 341; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 342 %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> 343 ret <4 x i32> %shift 344} 345 346define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { 347; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': 348; SSE2: Found an estimated cost of 2 for instruction: %shift 349; SSE41: Found an estimated cost of 2 for instruction: %shift 350; AVX: Found an estimated cost of 2 for instruction: %shift 351; AVX2: Found an estimated cost of 1 for instruction: %shift 352; XOPAVX: Found an estimated cost of 4 for instruction: %shift 353; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 354 %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 355 ret <8 x i32> %shift 356} 357 358define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { 359; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': 360; SSE2: Found an estimated cost of 1 for instruction: %shift 361; SSE41: Found an estimated cost of 1 for instruction: %shift 362; AVX: Found an estimated cost of 1 for instruction: %shift 363; AVX2: Found an estimated cost of 1 for instruction: %shift 364; XOP: Found an estimated cost of 2 for instruction: %shift 365 %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 366 ret <8 x i16> %shift 367} 368 369define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { 370; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': 371; SSE2: Found an estimated cost of 2 for instruction: %shift 372; SSE41: Found an estimated cost of 2 for instruction: %shift 373; AVX: Found an estimated cost of 2 for instruction: %shift 374; AVX2: Found an estimated cost of 10 for instruction: %shift 375; XOP: Found an estimated cost of 4 for instruction: %shift 376 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 377 ret <16 x i16> %shift 378} 379 380define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { 381; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': 382; SSE2: Found an estimated cost of 1 for instruction: %shift 383; SSE41: Found an estimated cost of 1 for instruction: %shift 384; AVX: Found an estimated cost of 1 for instruction: %shift 385; AVX2: Found an estimated cost of 1 for instruction: %shift 386; XOP: Found an estimated cost of 2 for instruction: %shift 387 %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 388 ret <16 x i8> %shift 389} 390 391define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { 392; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': 393; SSE2: Found an estimated cost of 2 for instruction: %shift 394; SSE41: Found an estimated cost of 2 for instruction: %shift 395; AVX: Found an estimated cost of 2 for instruction: %shift 396; AVX2: Found an estimated cost of 11 for instruction: %shift 397; XOP: Found an estimated cost of 4 for instruction: %shift 398 %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 399 ret <32 x i8> %shift 400} 401