1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5 6; 7; DemandedBits - MOVMSK zeros the upper bits of the result. 8; 9 10define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) { 11; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb( 12; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx [[A0:%.*]]) 13; CHECK-NEXT: ret i32 [[TMP1]] 14; 15 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 16 %2 = and i32 %1, 255 17 ret i32 %2 18} 19 20define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) { 21; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps( 22; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32> 23; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer 24; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 25; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32 26; CHECK-NEXT: ret i32 [[TMP4]] 27; 28 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 29 %2 = and i32 %1, 15 30 ret i32 %2 31} 32 33define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) { 34; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd( 35; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64> 36; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer 37; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i1> [[TMP2]] to i2 38; CHECK-NEXT: [[TMP4:%.*]] = zext i2 [[TMP3]] to i32 39; CHECK-NEXT: ret i32 [[TMP4]] 40; 41 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 42 %2 = and i32 %1, 3 43 ret i32 %2 44} 45 46define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 47; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128( 48; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <16 x i8> [[A0:%.*]], zeroinitializer 49; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i1> [[TMP1]] to i16 50; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 51; CHECK-NEXT: ret i32 [[TMP3]] 52; 53 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 54 %2 = and i32 %1, 65535 55 ret i32 %2 56} 57 58define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) { 59; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256( 60; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32> 61; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer 62; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8 63; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 64; CHECK-NEXT: ret i32 [[TMP4]] 65; 66 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 67 %2 = and i32 %1, 255 68 ret i32 %2 69} 70 71define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) { 72; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256( 73; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64> 74; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i64> [[TMP1]], zeroinitializer 75; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 76; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32 77; CHECK-NEXT: ret i32 [[TMP4]] 78; 79 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 80 %2 = and i32 %1, 15 81 ret i32 %2 82} 83 84; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. 85 86; 87; DemandedBits - If we don't use the lower bits then we just return zero. 88; 89 90define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) { 91; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb( 92; CHECK-NEXT: ret i32 0 93; 94 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 95 %2 = and i32 %1, -256 96 ret i32 %2 97} 98 99define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) { 100; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps( 101; CHECK-NEXT: ret i32 0 102; 103 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 104 %2 = and i32 %1, -16 105 ret i32 %2 106} 107 108define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) { 109; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd( 110; CHECK-NEXT: ret i32 0 111; 112 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 113 %2 = and i32 %1, -4 114 ret i32 %2 115} 116 117define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 118; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128( 119; CHECK-NEXT: ret i32 0 120; 121 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 122 %2 = and i32 %1, -65536 123 ret i32 %2 124} 125 126define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) { 127; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256( 128; CHECK-NEXT: ret i32 0 129; 130 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 131 %2 = and i32 %1, -256 132 ret i32 %2 133} 134 135define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) { 136; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256( 137; CHECK-NEXT: ret i32 0 138; 139 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 140 %2 = and i32 %1, -16 141 ret i32 %2 142} 143 144; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. 145 146; 147; Constant Folding (UNDEF -> ZERO) 148; 149 150define i32 @undef_x86_mmx_pmovmskb() { 151; CHECK-LABEL: @undef_x86_mmx_pmovmskb( 152; CHECK-NEXT: ret i32 0 153; 154 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef) 155 ret i32 %1 156} 157 158define i32 @undef_x86_sse_movmsk_ps() { 159; CHECK-LABEL: @undef_x86_sse_movmsk_ps( 160; CHECK-NEXT: ret i32 0 161; 162 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef) 163 ret i32 %1 164} 165 166define i32 @undef_x86_sse2_movmsk_pd() { 167; CHECK-LABEL: @undef_x86_sse2_movmsk_pd( 168; CHECK-NEXT: ret i32 0 169; 170 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef) 171 ret i32 %1 172} 173 174define i32 @undef_x86_sse2_pmovmskb_128() { 175; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128( 176; CHECK-NEXT: ret i32 0 177; 178 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef) 179 ret i32 %1 180} 181 182define i32 @undef_x86_avx_movmsk_ps_256() { 183; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256( 184; CHECK-NEXT: ret i32 0 185; 186 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef) 187 ret i32 %1 188} 189 190define i32 @undef_x86_avx_movmsk_pd_256() { 191; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256( 192; CHECK-NEXT: ret i32 0 193; 194 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef) 195 ret i32 %1 196} 197 198define i32 @undef_x86_avx2_pmovmskb() { 199; CHECK-LABEL: @undef_x86_avx2_pmovmskb( 200; CHECK-NEXT: ret i32 0 201; 202 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef) 203 ret i32 %1 204} 205 206; 207; Constant Folding (ZERO -> ZERO) 208; 209 210define i32 @zero_x86_mmx_pmovmskb() { 211; CHECK-LABEL: @zero_x86_mmx_pmovmskb( 212; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx)) 213; CHECK-NEXT: ret i32 [[TMP1]] 214; 215 %1 = bitcast <1 x i64> zeroinitializer to x86_mmx 216 %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) 217 ret i32 %2 218} 219 220define i32 @zero_x86_sse_movmsk_ps() { 221; CHECK-LABEL: @zero_x86_sse_movmsk_ps( 222; CHECK-NEXT: ret i32 0 223; 224 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer) 225 ret i32 %1 226} 227 228define i32 @zero_x86_sse2_movmsk_pd() { 229; CHECK-LABEL: @zero_x86_sse2_movmsk_pd( 230; CHECK-NEXT: ret i32 0 231; 232 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer) 233 ret i32 %1 234} 235 236define i32 @zero_x86_sse2_pmovmskb_128() { 237; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128( 238; CHECK-NEXT: ret i32 0 239; 240 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer) 241 ret i32 %1 242} 243 244define i32 @zero_x86_avx_movmsk_ps_256() { 245; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256( 246; CHECK-NEXT: ret i32 0 247; 248 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer) 249 ret i32 %1 250} 251 252define i32 @zero_x86_avx_movmsk_pd_256() { 253; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256( 254; CHECK-NEXT: ret i32 0 255; 256 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer) 257 ret i32 %1 258} 259 260define i32 @zero_x86_avx2_pmovmskb() { 261; CHECK-LABEL: @zero_x86_avx2_pmovmskb( 262; CHECK-NEXT: ret i32 0 263; 264 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer) 265 ret i32 %1 266} 267 268; 269; Constant Folding 270; 271 272define i32 @fold_x86_mmx_pmovmskb() { 273; CHECK-LABEL: @fold_x86_mmx_pmovmskb( 274; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx)) 275; CHECK-NEXT: ret i32 [[TMP1]] 276; 277 %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx 278 %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1) 279 ret i32 %2 280} 281 282define i32 @fold_x86_sse_movmsk_ps() { 283; CHECK-LABEL: @fold_x86_sse_movmsk_ps( 284; CHECK-NEXT: ret i32 10 285; 286 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>) 287 ret i32 %1 288} 289 290define i32 @fold_x86_sse2_movmsk_pd() { 291; CHECK-LABEL: @fold_x86_sse2_movmsk_pd( 292; CHECK-NEXT: ret i32 2 293; 294 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>) 295 ret i32 %1 296} 297 298define i32 @fold_x86_sse2_pmovmskb_128() { 299; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128( 300; CHECK-NEXT: ret i32 5654 301; 302 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) 303 ret i32 %1 304} 305 306define i32 @fold_x86_avx_movmsk_ps_256() { 307; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256( 308; CHECK-NEXT: ret i32 170 309; 310 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>) 311 ret i32 %1 312} 313 314define i32 @fold_x86_avx_movmsk_pd_256() { 315; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256( 316; CHECK-NEXT: ret i32 10 317; 318 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>) 319 ret i32 %1 320} 321 322define i32 @fold_x86_avx2_pmovmskb() { 323; CHECK-LABEL: @fold_x86_avx2_pmovmskb( 324; CHECK-NEXT: ret i32 370546176 325; 326 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>) 327 ret i32 %1 328} 329 330define i32 @sext_sse_movmsk_ps(<4 x i1> %x) { 331; CHECK-LABEL: @sext_sse_movmsk_ps( 332; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4 333; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i32 334; CHECK-NEXT: ret i32 [[TMP2]] 335; 336 %sext = sext <4 x i1> %x to <4 x i32> 337 %bc = bitcast <4 x i32> %sext to <4 x float> 338 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc) 339 ret i32 %r 340} 341 342define i32 @sext_sse2_movmsk_pd(<2 x i1> %x) { 343; CHECK-LABEL: @sext_sse2_movmsk_pd( 344; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[X:%.*]] to i2 345; CHECK-NEXT: [[TMP2:%.*]] = zext i2 [[TMP1]] to i32 346; CHECK-NEXT: ret i32 [[TMP2]] 347; 348 %sext = sext <2 x i1> %x to <2 x i64> 349 %bc = bitcast <2 x i64> %sext to <2 x double> 350 %r = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %bc) 351 ret i32 %r 352} 353 354define i32 @sext_sse2_pmovmskb_128(<16 x i1> %x) { 355; CHECK-LABEL: @sext_sse2_pmovmskb_128( 356; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i1> [[X:%.*]] to i16 357; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 358; CHECK-NEXT: ret i32 [[TMP2]] 359; 360 %sext = sext <16 x i1> %x to <16 x i8> 361 %r = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %sext) 362 ret i32 %r 363} 364 365define i32 @sext_avx_movmsk_ps_256(<8 x i1> %x) { 366; CHECK-LABEL: @sext_avx_movmsk_ps_256( 367; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[X:%.*]] to i8 368; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 369; CHECK-NEXT: ret i32 [[TMP2]] 370; 371 %sext = sext <8 x i1> %x to <8 x i32> 372 %bc = bitcast <8 x i32> %sext to <8 x float> 373 %r = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %bc) 374 ret i32 %r 375} 376 377define i32 @sext_avx_movmsk_pd_256(<4 x i1> %x) { 378; CHECK-LABEL: @sext_avx_movmsk_pd_256( 379; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4 380; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i32 381; CHECK-NEXT: ret i32 [[TMP2]] 382; 383 %sext = sext <4 x i1> %x to <4 x i64> 384 %bc = bitcast <4 x i64> %sext to <4 x double> 385 %r = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %bc) 386 ret i32 %r 387} 388 389define i32 @sext_avx2_pmovmskb(<32 x i1> %x) { 390; CHECK-LABEL: @sext_avx2_pmovmskb( 391; CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i1> [[X:%.*]] to i32 392; CHECK-NEXT: ret i32 [[TMP1]] 393; 394 %sext = sext <32 x i1> %x to <32 x i8> 395 %r = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %sext) 396 ret i32 %r 397} 398 399; Bitcast from sign-extended scalar. 400 401define i32 @sext_sse_movmsk_ps_scalar_source(i1 %x) { 402; CHECK-LABEL: @sext_sse_movmsk_ps_scalar_source( 403; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[X:%.*]] to i128 404; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[SEXT]] to <4 x i32> 405; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer 406; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 407; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32 408; CHECK-NEXT: ret i32 [[TMP4]] 409; 410 %sext = sext i1 %x to i128 411 %bc = bitcast i128 %sext to <4 x float> 412 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc) 413 ret i32 %r 414} 415 416; Bitcast from vector type with more elements. 417 418define i32 @sext_sse_movmsk_ps_too_many_elts(<8 x i1> %x) { 419; CHECK-LABEL: @sext_sse_movmsk_ps_too_many_elts( 420; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[X:%.*]] to <8 x i16> 421; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SEXT]] to <4 x i32> 422; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer 423; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 424; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32 425; CHECK-NEXT: ret i32 [[TMP4]] 426; 427 %sext = sext <8 x i1> %x to <8 x i16> 428 %bc = bitcast <8 x i16> %sext to <4 x float> 429 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc) 430 ret i32 %r 431} 432 433; Handle this by doing a bitcasted sign-bit test after the sext. 434 435define i32 @sext_sse_movmsk_ps_must_replicate_bits(<2 x i1> %x) { 436; CHECK-LABEL: @sext_sse_movmsk_ps_must_replicate_bits( 437; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i64> 438; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x i32> 439; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer 440; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 441; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32 442; CHECK-NEXT: ret i32 [[TMP4]] 443; 444 %sext = sext <2 x i1> %x to <2 x i64> 445 %bc = bitcast <2 x i64> %sext to <4 x float> 446 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc) 447 ret i32 %r 448} 449 450declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) 451 452declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) 453declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) 454declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 455 456declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) 457declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) 458declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) 459