1; RUN: opt -slp-vectorizer < %s -S | FileCheck %s 2 3; Verify that the SLP vectorizer is able to figure out that commutativity 4; offers the possibility to splat/broadcast %c and thus make it profitable 5; to vectorize this case 6 7 8; ModuleID = 'bugpoint-reduced-simplified.bc' 9target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 10target triple = "x86_64-apple-macosx10.11.0" 11 12@cle = external unnamed_addr global [32 x i8], align 16 13@cle32 = external unnamed_addr global [32 x i32], align 16 14 15 16; Check that we correctly detect a splat/broadcast by leveraging the 17; commutativity property of `xor`. 18 19; CHECK-LABEL: @splat 20; CHECK: store <16 x i8> 21define void @splat(i8 %a, i8 %b, i8 %c) { 22 %1 = xor i8 %c, %a 23 store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16 24 %2 = xor i8 %a, %c 25 store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1) 26 %3 = xor i8 %a, %c 27 store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2) 28 %4 = xor i8 %a, %c 29 store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3) 30 %5 = xor i8 %c, %a 31 store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4) 32 %6 = xor i8 %c, %b 33 store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5) 34 %7 = xor i8 %c, %a 35 store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6) 36 %8 = xor i8 %c, %b 37 store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7) 38 %9 = xor i8 %a, %c 39 store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8) 40 %10 = xor i8 %a, %c 41 store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9) 42 %11 = xor i8 %a, %c 43 store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10) 44 %12 = xor i8 %a, %c 45 store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11) 46 %13 = xor i8 %a, %c 47 store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12) 48 %14 = xor i8 %a, %c 49 store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13) 50 %15 = xor i8 %a, %c 51 store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14) 52 %16 = xor i8 %a, %c 53 store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15) 54 ret void 55} 56 57 58 59; Check that we correctly detect that we can have the same opcode on one side by 60; leveraging the commutativity property of `xor`. 61 62; CHECK-LABEL: @same_opcode_on_one_side 63; CHECK: store <4 x i32> 64define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { 65 %add1 = add i32 %c, %a 66 %add2 = add i32 %c, %a 67 %add3 = add i32 %a, %c 68 %add4 = add i32 %c, %a 69 %1 = xor i32 %add1, %a 70 store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16 71 %2 = xor i32 %b, %add2 72 store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1) 73 %3 = xor i32 %c, %add3 74 store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2) 75 %4 = xor i32 %a, %add4 76 store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3) 77 ret void 78} 79