1; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s 2 3; Test based on pr5626 to load/store 4; 5 6%i32vec3 = type <3 x i32> 7define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 8; CHECK: movdqa 9; CHECK: paddd 10; CHECK: pextrd 11; CHECK: movq 12 %a = load %i32vec3* %ap, align 16 13 %b = load %i32vec3* %bp, align 16 14 %x = add %i32vec3 %a, %b 15 store %i32vec3 %x, %i32vec3* %ret, align 16 16 ret void 17} 18 19define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 20; CHECK: movq 21; CHECK: pinsrd 22; CHECK: movq 23; CHECK: pinsrd 24; CHECK: paddd 25; CHECK: pextrd 26; CHECK: movq 27 %a = load %i32vec3* %ap, align 8 28 %b = load %i32vec3* %bp, align 8 29 %x = add %i32vec3 %a, %b 30 store %i32vec3 %x, %i32vec3* %ret, align 8 31 ret void 32} 33 34%i32vec7 = type <7 x i32> 35define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { 36; CHECK: movdqa 37; CHECK: movdqa 38; CHECK: paddd 39; CHECK: paddd 40; CHECK: pextrd 41; CHECK: movq 42; CHECK: movdqa 43 %a = load %i32vec7* %ap, align 16 44 %b = load %i32vec7* %bp, align 16 45 %x = add %i32vec7 %a, %b 46 store %i32vec7 %x, %i32vec7* %ret, align 16 47 ret void 48} 49 50%i32vec12 = type <12 x i32> 51define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { 52; CHECK: movdqa 53; CHECK: movdqa 54; CHECK: movdqa 55; CHECK: paddd 56; CHECK: paddd 57; CHECK: paddd 58; CHECK: movdqa 59; CHECK: movdqa 60; CHECK: movdqa 61 %a = load %i32vec12* %ap, align 16 62 %b = load %i32vec12* %bp, align 16 63 %x = add %i32vec12 %a, %b 64 store %i32vec12 %x, %i32vec12* %ret, align 16 65 ret void 66} 67 68 69%i16vec3 = type <3 x i16> 70define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { 71; CHECK: movdqa 72; CHECK: paddw 73; CHECK: movd 74; CHECK: pextrw 75 %a = load %i16vec3* %ap, align 16 76 %b = load %i16vec3* %bp, align 16 77 %x = add %i16vec3 %a, %b 78 store %i16vec3 %x, %i16vec3* %ret, align 16 79 ret void 80} 81 82%i16vec4 = type <4 x i16> 83define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { 84; CHECK: movdqa 85; CHECK: paddw 86; CHECK: movq 87 %a = load %i16vec4* %ap, align 16 88 %b = load %i16vec4* %bp, align 16 89 %x = add %i16vec4 %a, %b 90 store %i16vec4 %x, %i16vec4* %ret, align 16 91 ret void 92} 93 94%i16vec12 = type <12 x i16> 95define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { 96; CHECK: movdqa 97; CHECK: movdqa 98; CHECK: paddw 99; CHECK: paddw 100; CHECK: movq 101; CHECK: movdqa 102 %a = load %i16vec12* %ap, align 16 103 %b = load %i16vec12* %bp, align 16 104 %x = add %i16vec12 %a, %b 105 store %i16vec12 %x, %i16vec12* %ret, align 16 106 ret void 107} 108 109%i16vec18 = type <18 x i16> 110define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { 111; CHECK: movdqa 112; CHECK: movdqa 113; CHECK: movdqa 114; CHECK: paddw 115; CHECK: paddw 116; CHECK: paddw 117; CHECK: movd 118; CHECK: movdqa 119; CHECK: movdqa 120 %a = load %i16vec18* %ap, align 16 121 %b = load %i16vec18* %bp, align 16 122 %x = add %i16vec18 %a, %b 123 store %i16vec18 %x, %i16vec18* %ret, align 16 124 ret void 125} 126 127 128%i8vec3 = type <3 x i8> 129define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { 130; CHECK: movdqa 131; CHECK: paddb 132; CHECK: pextrb 133; CHECK: movb 134 %a = load %i8vec3* %ap, align 16 135 %b = load %i8vec3* %bp, align 16 136 %x = add %i8vec3 %a, %b 137 store %i8vec3 %x, %i8vec3* %ret, align 16 138 ret void 139} 140 141%i8vec31 = type <31 x i8> 142define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { 143; CHECK: movdqa 144; CHECK: movdqa 145; CHECK: paddb 146; CHECK: paddb 147; CHECK: movq 148; CHECK: pextrb 149; CHECK: pextrw 150 %a = load %i8vec31* %ap, align 16 151 %b = load %i8vec31* %bp, align 16 152 %x = add %i8vec31 %a, %b 153 store %i8vec31 %x, %i8vec31* %ret, align 16 154 ret void 155} 156 157 158%i8vec3pack = type { <3 x i8>, i8 } 159define %i8vec3pack @rot() nounwind { 160; CHECK: shrb 161entry: 162 %X = alloca %i8vec3pack, align 4 163 %rot = alloca %i8vec3pack, align 4 164 %result = alloca %i8vec3pack, align 4 165 %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* 166 store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp 167 %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>* 168 store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1 169 %tmp = load %i8vec3pack* %X 170 %extractVec = extractvalue %i8vec3pack %tmp, 0 171 %tmp2 = load %i8vec3pack* %rot 172 %extractVec3 = extractvalue %i8vec3pack %tmp2, 0 173 %shr = lshr <3 x i8> %extractVec, %extractVec3 174 %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>* 175 store <3 x i8> %shr, <3 x i8>* %storetmp4 176 %tmp5 = load %i8vec3pack* %result 177 ret %i8vec3pack %tmp5 178} 179 180