1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <8 x i8> @v_dup8(i8 %A) nounwind { 4;CHECK: v_dup8: 5;CHECK: vdup.8 6 %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0 7 %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1 8 %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2 9 %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3 10 %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4 11 %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5 12 %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6 13 %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7 14 ret <8 x i8> %tmp8 15} 16 17define <4 x i16> @v_dup16(i16 %A) nounwind { 18;CHECK: v_dup16: 19;CHECK: vdup.16 20 %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0 21 %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1 22 %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2 23 %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3 24 ret <4 x i16> %tmp4 25} 26 27define <2 x i32> @v_dup32(i32 %A) nounwind { 28;CHECK: v_dup32: 29;CHECK: vdup.32 30 %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0 31 %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1 32 ret <2 x i32> %tmp2 33} 34 35define <2 x float> @v_dupfloat(float %A) nounwind { 36;CHECK: v_dupfloat: 37;CHECK: vdup.32 38 %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0 39 %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1 40 ret <2 x float> %tmp2 41} 42 43define <16 x i8> @v_dupQ8(i8 %A) nounwind { 44;CHECK: v_dupQ8: 45;CHECK: vdup.8 46 %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0 47 %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1 48 %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2 49 %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3 50 %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4 51 %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5 52 %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6 53 %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7 54 %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8 55 %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9 56 %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10 57 %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11 58 %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12 59 %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13 60 %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14 61 %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15 62 ret <16 x i8> %tmp16 63} 64 65define <8 x i16> @v_dupQ16(i16 %A) nounwind { 66;CHECK: v_dupQ16: 67;CHECK: vdup.16 68 %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0 69 %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1 70 %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2 71 %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3 72 %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4 73 %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5 74 %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6 75 %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7 76 ret <8 x i16> %tmp8 77} 78 79define <4 x i32> @v_dupQ32(i32 %A) nounwind { 80;CHECK: v_dupQ32: 81;CHECK: vdup.32 82 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0 83 %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1 84 %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2 85 %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3 86 ret <4 x i32> %tmp4 87} 88 89define <4 x float> @v_dupQfloat(float %A) nounwind { 90;CHECK: v_dupQfloat: 91;CHECK: vdup.32 92 %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0 93 %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1 94 %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2 95 %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3 96 ret <4 x float> %tmp4 97} 98 99; Check to make sure it works with shuffles, too. 100 101define <8 x i8> @v_shuffledup8(i8 %A) nounwind { 102;CHECK: v_shuffledup8: 103;CHECK: vdup.8 104 %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0 105 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 106 ret <8 x i8> %tmp2 107} 108 109define <4 x i16> @v_shuffledup16(i16 %A) nounwind { 110;CHECK: v_shuffledup16: 111;CHECK: vdup.16 112 %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0 113 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 114 ret <4 x i16> %tmp2 115} 116 117define <2 x i32> @v_shuffledup32(i32 %A) nounwind { 118;CHECK: v_shuffledup32: 119;CHECK: vdup.32 120 %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0 121 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 122 ret <2 x i32> %tmp2 123} 124 125define <2 x float> @v_shuffledupfloat(float %A) nounwind { 126;CHECK: v_shuffledupfloat: 127;CHECK: vdup.32 128 %tmp1 = insertelement <2 x float> undef, float %A, i32 0 129 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 130 ret <2 x float> %tmp2 131} 132 133define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind { 134;CHECK: v_shuffledupQ8: 135;CHECK: vdup.8 136 %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0 137 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer 138 ret <16 x i8> %tmp2 139} 140 141define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind { 142;CHECK: v_shuffledupQ16: 143;CHECK: vdup.16 144 %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0 145 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer 146 ret <8 x i16> %tmp2 147} 148 149define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind { 150;CHECK: v_shuffledupQ32: 151;CHECK: vdup.32 152 %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0 153 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 154 ret <4 x i32> %tmp2 155} 156 157define <4 x float> @v_shuffledupQfloat(float %A) nounwind { 158;CHECK: v_shuffledupQfloat: 159;CHECK: vdup.32 160 %tmp1 = insertelement <4 x float> undef, float %A, i32 0 161 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 162 ret <4 x float> %tmp2 163} 164 165define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { 166;CHECK: vduplane8: 167;CHECK: vdup.8 168 %tmp1 = load <8 x i8>* %A 169 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > 170 ret <8 x i8> %tmp2 171} 172 173define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind { 174;CHECK: vduplane16: 175;CHECK: vdup.16 176 %tmp1 = load <4 x i16>* %A 177 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > 178 ret <4 x i16> %tmp2 179} 180 181define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind { 182;CHECK: vduplane32: 183;CHECK: vdup.32 184 %tmp1 = load <2 x i32>* %A 185 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > 186 ret <2 x i32> %tmp2 187} 188 189define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind { 190;CHECK: vduplanefloat: 191;CHECK: vdup.32 192 %tmp1 = load <2 x float>* %A 193 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 > 194 ret <2 x float> %tmp2 195} 196 197define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind { 198;CHECK: vduplaneQ8: 199;CHECK: vdup.8 200 %tmp1 = load <8 x i8>* %A 201 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > 202 ret <16 x i8> %tmp2 203} 204 205define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind { 206;CHECK: vduplaneQ16: 207;CHECK: vdup.16 208 %tmp1 = load <4 x i16>* %A 209 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > 210 ret <8 x i16> %tmp2 211} 212 213define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind { 214;CHECK: vduplaneQ32: 215;CHECK: vdup.32 216 %tmp1 = load <2 x i32>* %A 217 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > 218 ret <4 x i32> %tmp2 219} 220 221define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind { 222;CHECK: vduplaneQfloat: 223;CHECK: vdup.32 224 %tmp1 = load <2 x float>* %A 225 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > 226 ret <4 x float> %tmp2 227} 228 229define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone { 230entry: 231 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 232 ret <2 x i64> %0 233} 234 235define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone { 236entry: 237 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 238 ret <2 x i64> %0 239} 240 241define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone { 242entry: 243 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1> 244 ret <2 x double> %0 245} 246 247define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone { 248entry: 249 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0> 250 ret <2 x double> %0 251} 252 253; Radar 7373643 254;CHECK: redundantVdup: 255;CHECK: vmov.i8 256;CHECK-NOT: vdup.8 257;CHECK: vstr.64 258define void @redundantVdup(<8 x i8>* %ptr) nounwind { 259 %1 = insertelement <8 x i8> undef, i8 -128, i32 0 260 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer 261 store <8 x i8> %2, <8 x i8>* %ptr, align 8 262 ret void 263} 264