1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3; rdar://9428579 4 5%type1 = type { <16 x i8> } 6%type2 = type { <8 x i8> } 7%type3 = type { <4 x i16> } 8 9 10define hidden fastcc void @t1(%type1** %argtable) nounwind { 11entry: 12; CHECK-LABEL: t1: 13; CHECK: ldr x[[REG:[0-9]+]], [x0] 14; CHECK: str q0, [x[[REG]]] 15 %tmp1 = load %type1*, %type1** %argtable, align 8 16 %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0 17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16 18 ret void 19} 20 21define hidden fastcc void @t2(%type2** %argtable) nounwind { 22entry: 23; CHECK-LABEL: t2: 24; CHECK: ldr x[[REG:[0-9]+]], [x0] 25; CHECK: str d0, [x[[REG]]] 26 %tmp1 = load %type2*, %type2** %argtable, align 8 27 %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0 28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8 29 ret void 30} 31 32; add a bunch of tests for rdar://11246289 33 34@globalArray64x2 = common global <2 x i64>* null, align 8 35@globalArray32x4 = common global <4 x i32>* null, align 8 36@globalArray16x8 = common global <8 x i16>* null, align 8 37@globalArray8x16 = common global <16 x i8>* null, align 8 38@globalArray64x1 = common global <1 x i64>* null, align 8 39@globalArray32x2 = common global <2 x i32>* null, align 8 40@globalArray16x4 = common global <4 x i16>* null, align 8 41@globalArray8x8 = common global <8 x i8>* null, align 8 42@floatglobalArray64x2 = common global <2 x double>* null, align 8 43@floatglobalArray32x4 = common global <4 x float>* null, align 8 44@floatglobalArray64x1 = common global <1 x double>* null, align 8 45@floatglobalArray32x2 = common global <2 x float>* null, align 8 46 47define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp { 48entry: 49; CHECK-LABEL: fct1_64x2: 50; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 51; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]] 52; CHECK: ldr [[BASE:x[0-9]+]], 53; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 54 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset 55 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 56 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 57 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset 58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 59 ret void 60} 61 62define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp { 63entry: 64; CHECK-LABEL: fct2_64x2: 65; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 66; CHECK: ldr [[BASE:x[0-9]+]], 67; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 68 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3 69 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 70 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 71 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5 72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 73 ret void 74} 75 76define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp { 77entry: 78; CHECK-LABEL: fct1_32x4: 79; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 80; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 81; CHECK: ldr [[BASE:x[0-9]+]], 82; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 83 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset 84 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 85 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 86 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset 87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 88 ret void 89} 90 91define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp { 92entry: 93; CHECK-LABEL: fct2_32x4: 94; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 95; CHECK: ldr [[BASE:x[0-9]+]], 96; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 97 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3 98 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 99 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 100 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5 101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 102 ret void 103} 104 105define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp { 106entry: 107; CHECK-LABEL: fct1_16x8: 108; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 109; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 110; CHECK: ldr [[BASE:x[0-9]+]], 111; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 112 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset 113 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 114 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 115 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset 116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 117 ret void 118} 119 120define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp { 121entry: 122; CHECK-LABEL: fct2_16x8: 123; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 124; CHECK: ldr [[BASE:x[0-9]+]], 125; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 126 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3 127 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 128 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 129 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5 130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 131 ret void 132} 133 134define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp { 135entry: 136; CHECK-LABEL: fct1_8x16: 137; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 138; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 139; CHECK: ldr [[BASE:x[0-9]+]], 140; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 141 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset 142 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 143 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 144 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset 145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 146 ret void 147} 148 149define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp { 150entry: 151; CHECK-LABEL: fct2_8x16: 152; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 153; CHECK: ldr [[BASE:x[0-9]+]], 154; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 155 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3 156 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 157 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 158 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5 159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 160 ret void 161} 162 163define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp { 164entry: 165; CHECK-LABEL: fct1_64x1: 166; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 167; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 168; CHECK: ldr [[BASE:x[0-9]+]], 169; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 170 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset 171 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 172 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 173 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset 174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 175 ret void 176} 177 178define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp { 179entry: 180; CHECK-LABEL: fct2_64x1: 181; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 182; CHECK: ldr [[BASE:x[0-9]+]], 183; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 184 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3 185 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 186 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 187 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5 188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 189 ret void 190} 191 192define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp { 193entry: 194; CHECK-LABEL: fct1_32x2: 195; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 196; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 197; CHECK: ldr [[BASE:x[0-9]+]], 198; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 199 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset 200 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 201 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 202 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset 203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 204 ret void 205} 206 207define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp { 208entry: 209; CHECK-LABEL: fct2_32x2: 210; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 211; CHECK: ldr [[BASE:x[0-9]+]], 212; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 213 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3 214 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 215 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 216 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5 217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 218 ret void 219} 220 221define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp { 222entry: 223; CHECK-LABEL: fct1_16x4: 224; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 225; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 226; CHECK: ldr [[BASE:x[0-9]+]], 227; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 228 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset 229 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 230 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 231 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset 232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 233 ret void 234} 235 236define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp { 237entry: 238; CHECK-LABEL: fct2_16x4: 239; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 240; CHECK: ldr [[BASE:x[0-9]+]], 241; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 242 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3 243 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 244 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 245 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5 246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 247 ret void 248} 249 250define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp { 251entry: 252; CHECK-LABEL: fct1_8x8: 253; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 254; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 255; CHECK: ldr [[BASE:x[0-9]+]], 256; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 257 %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset 258 %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8 259 %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8 260 %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset 261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8 262 ret void 263} 264 265; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q 266; registers for unscaled vector accesses 267 268define <1 x i64> @fct0(i8* %str) nounwind readonly ssp { 269entry: 270; CHECK-LABEL: fct0: 271; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 272 %p = getelementptr inbounds i8, i8* %str, i64 3 273 %q = bitcast i8* %p to <1 x i64>* 274 %0 = load <1 x i64>, <1 x i64>* %q, align 8 275 ret <1 x i64> %0 276} 277 278define <2 x i32> @fct1(i8* %str) nounwind readonly ssp { 279entry: 280; CHECK-LABEL: fct1: 281; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 282 %p = getelementptr inbounds i8, i8* %str, i64 3 283 %q = bitcast i8* %p to <2 x i32>* 284 %0 = load <2 x i32>, <2 x i32>* %q, align 8 285 ret <2 x i32> %0 286} 287 288define <4 x i16> @fct2(i8* %str) nounwind readonly ssp { 289entry: 290; CHECK-LABEL: fct2: 291; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 292 %p = getelementptr inbounds i8, i8* %str, i64 3 293 %q = bitcast i8* %p to <4 x i16>* 294 %0 = load <4 x i16>, <4 x i16>* %q, align 8 295 ret <4 x i16> %0 296} 297 298define <8 x i8> @fct3(i8* %str) nounwind readonly ssp { 299entry: 300; CHECK-LABEL: fct3: 301; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 302 %p = getelementptr inbounds i8, i8* %str, i64 3 303 %q = bitcast i8* %p to <8 x i8>* 304 %0 = load <8 x i8>, <8 x i8>* %q, align 8 305 ret <8 x i8> %0 306} 307 308define <2 x i64> @fct4(i8* %str) nounwind readonly ssp { 309entry: 310; CHECK-LABEL: fct4: 311; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 312 %p = getelementptr inbounds i8, i8* %str, i64 3 313 %q = bitcast i8* %p to <2 x i64>* 314 %0 = load <2 x i64>, <2 x i64>* %q, align 16 315 ret <2 x i64> %0 316} 317 318define <4 x i32> @fct5(i8* %str) nounwind readonly ssp { 319entry: 320; CHECK-LABEL: fct5: 321; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 322 %p = getelementptr inbounds i8, i8* %str, i64 3 323 %q = bitcast i8* %p to <4 x i32>* 324 %0 = load <4 x i32>, <4 x i32>* %q, align 16 325 ret <4 x i32> %0 326} 327 328define <8 x i16> @fct6(i8* %str) nounwind readonly ssp { 329entry: 330; CHECK-LABEL: fct6: 331; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 332 %p = getelementptr inbounds i8, i8* %str, i64 3 333 %q = bitcast i8* %p to <8 x i16>* 334 %0 = load <8 x i16>, <8 x i16>* %q, align 16 335 ret <8 x i16> %0 336} 337 338define <16 x i8> @fct7(i8* %str) nounwind readonly ssp { 339entry: 340; CHECK-LABEL: fct7: 341; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 342 %p = getelementptr inbounds i8, i8* %str, i64 3 343 %q = bitcast i8* %p to <16 x i8>* 344 %0 = load <16 x i8>, <16 x i8>* %q, align 16 345 ret <16 x i8> %0 346} 347 348define void @fct8(i8* %str) nounwind ssp { 349entry: 350; CHECK-LABEL: fct8: 351; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 352; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 353 %p = getelementptr inbounds i8, i8* %str, i64 3 354 %q = bitcast i8* %p to <1 x i64>* 355 %0 = load <1 x i64>, <1 x i64>* %q, align 8 356 %p2 = getelementptr inbounds i8, i8* %str, i64 4 357 %q2 = bitcast i8* %p2 to <1 x i64>* 358 store <1 x i64> %0, <1 x i64>* %q2, align 8 359 ret void 360} 361 362define void @fct9(i8* %str) nounwind ssp { 363entry: 364; CHECK-LABEL: fct9: 365; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 366; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 367 %p = getelementptr inbounds i8, i8* %str, i64 3 368 %q = bitcast i8* %p to <2 x i32>* 369 %0 = load <2 x i32>, <2 x i32>* %q, align 8 370 %p2 = getelementptr inbounds i8, i8* %str, i64 4 371 %q2 = bitcast i8* %p2 to <2 x i32>* 372 store <2 x i32> %0, <2 x i32>* %q2, align 8 373 ret void 374} 375 376define void @fct10(i8* %str) nounwind ssp { 377entry: 378; CHECK-LABEL: fct10: 379; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 380; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 381 %p = getelementptr inbounds i8, i8* %str, i64 3 382 %q = bitcast i8* %p to <4 x i16>* 383 %0 = load <4 x i16>, <4 x i16>* %q, align 8 384 %p2 = getelementptr inbounds i8, i8* %str, i64 4 385 %q2 = bitcast i8* %p2 to <4 x i16>* 386 store <4 x i16> %0, <4 x i16>* %q2, align 8 387 ret void 388} 389 390define void @fct11(i8* %str) nounwind ssp { 391entry: 392; CHECK-LABEL: fct11: 393; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 394; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 395 %p = getelementptr inbounds i8, i8* %str, i64 3 396 %q = bitcast i8* %p to <8 x i8>* 397 %0 = load <8 x i8>, <8 x i8>* %q, align 8 398 %p2 = getelementptr inbounds i8, i8* %str, i64 4 399 %q2 = bitcast i8* %p2 to <8 x i8>* 400 store <8 x i8> %0, <8 x i8>* %q2, align 8 401 ret void 402} 403 404define void @fct12(i8* %str) nounwind ssp { 405entry: 406; CHECK-LABEL: fct12: 407; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 408; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 409 %p = getelementptr inbounds i8, i8* %str, i64 3 410 %q = bitcast i8* %p to <2 x i64>* 411 %0 = load <2 x i64>, <2 x i64>* %q, align 16 412 %p2 = getelementptr inbounds i8, i8* %str, i64 4 413 %q2 = bitcast i8* %p2 to <2 x i64>* 414 store <2 x i64> %0, <2 x i64>* %q2, align 16 415 ret void 416} 417 418define void @fct13(i8* %str) nounwind ssp { 419entry: 420; CHECK-LABEL: fct13: 421; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 422; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 423 %p = getelementptr inbounds i8, i8* %str, i64 3 424 %q = bitcast i8* %p to <4 x i32>* 425 %0 = load <4 x i32>, <4 x i32>* %q, align 16 426 %p2 = getelementptr inbounds i8, i8* %str, i64 4 427 %q2 = bitcast i8* %p2 to <4 x i32>* 428 store <4 x i32> %0, <4 x i32>* %q2, align 16 429 ret void 430} 431 432define void @fct14(i8* %str) nounwind ssp { 433entry: 434; CHECK-LABEL: fct14: 435; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 436; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 437 %p = getelementptr inbounds i8, i8* %str, i64 3 438 %q = bitcast i8* %p to <8 x i16>* 439 %0 = load <8 x i16>, <8 x i16>* %q, align 16 440 %p2 = getelementptr inbounds i8, i8* %str, i64 4 441 %q2 = bitcast i8* %p2 to <8 x i16>* 442 store <8 x i16> %0, <8 x i16>* %q2, align 16 443 ret void 444} 445 446define void @fct15(i8* %str) nounwind ssp { 447entry: 448; CHECK-LABEL: fct15: 449; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 450; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 451 %p = getelementptr inbounds i8, i8* %str, i64 3 452 %q = bitcast i8* %p to <16 x i8>* 453 %0 = load <16 x i8>, <16 x i8>* %q, align 16 454 %p2 = getelementptr inbounds i8, i8* %str, i64 4 455 %q2 = bitcast i8* %p2 to <16 x i8>* 456 store <16 x i8> %0, <16 x i8>* %q2, align 16 457 ret void 458} 459 460; Check the building of vector from a single loaded value. 461; Part of <rdar://problem/14170854> 462; 463; Single loads with immediate offset. 464define <8 x i8> @fct16(i8* nocapture %sp0) { 465; CHECK-LABEL: fct16: 466; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 467; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 468entry: 469 %addr = getelementptr i8, i8* %sp0, i64 1 470 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 471 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 472 %vmull.i = mul <8 x i8> %vec, %vec 473 ret <8 x i8> %vmull.i 474} 475 476define <16 x i8> @fct17(i8* nocapture %sp0) { 477; CHECK-LABEL: fct17: 478; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 479; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 480entry: 481 %addr = getelementptr i8, i8* %sp0, i64 1 482 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 483 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 484 %vmull.i = mul <16 x i8> %vec, %vec 485 ret <16 x i8> %vmull.i 486} 487 488define <4 x i16> @fct18(i16* nocapture %sp0) { 489; CHECK-LABEL: fct18: 490; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 491; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 492entry: 493 %addr = getelementptr i16, i16* %sp0, i64 1 494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 495 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 496 %vmull.i = mul <4 x i16> %vec, %vec 497 ret <4 x i16> %vmull.i 498} 499 500define <8 x i16> @fct19(i16* nocapture %sp0) { 501; CHECK-LABEL: fct19: 502; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 503; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 504entry: 505 %addr = getelementptr i16, i16* %sp0, i64 1 506 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 507 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 508 %vmull.i = mul <8 x i16> %vec, %vec 509 ret <8 x i16> %vmull.i 510} 511 512define <2 x i32> @fct20(i32* nocapture %sp0) { 513; CHECK-LABEL: fct20: 514; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 515; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 516entry: 517 %addr = getelementptr i32, i32* %sp0, i64 1 518 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 519 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 520 %vmull.i = mul <2 x i32> %vec, %vec 521 ret <2 x i32> %vmull.i 522} 523 524define <4 x i32> @fct21(i32* nocapture %sp0) { 525; CHECK-LABEL: fct21: 526; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 527; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 528entry: 529 %addr = getelementptr i32, i32* %sp0, i64 1 530 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 531 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 532 %vmull.i = mul <4 x i32> %vec, %vec 533 ret <4 x i32> %vmull.i 534} 535 536define <1 x i64> @fct22(i64* nocapture %sp0) { 537; CHECK-LABEL: fct22: 538; CHECK: ldr d0, [x0, #8] 539entry: 540 %addr = getelementptr i64, i64* %sp0, i64 1 541 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 542 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 543 ret <1 x i64> %vec 544} 545 546define <2 x i64> @fct23(i64* nocapture %sp0) { 547; CHECK-LABEL: fct23: 548; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 549entry: 550 %addr = getelementptr i64, i64* %sp0, i64 1 551 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 552 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 553 ret <2 x i64> %vec 554} 555 556; 557; Single loads with register offset. 558define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) { 559; CHECK-LABEL: fct24: 560; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 561; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 562entry: 563 %addr = getelementptr i8, i8* %sp0, i64 %offset 564 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 565 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 566 %vmull.i = mul <8 x i8> %vec, %vec 567 ret <8 x i8> %vmull.i 568} 569 570define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) { 571; CHECK-LABEL: fct25: 572; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 573; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 574entry: 575 %addr = getelementptr i8, i8* %sp0, i64 %offset 576 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 577 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 578 %vmull.i = mul <16 x i8> %vec, %vec 579 ret <16 x i8> %vmull.i 580} 581 582define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) { 583; CHECK-LABEL: fct26: 584; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 585; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 586entry: 587 %addr = getelementptr i16, i16* %sp0, i64 %offset 588 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 589 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 590 %vmull.i = mul <4 x i16> %vec, %vec 591 ret <4 x i16> %vmull.i 592} 593 594define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) { 595; CHECK-LABEL: fct27: 596; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 597; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 598entry: 599 %addr = getelementptr i16, i16* %sp0, i64 %offset 600 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 601 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 602 %vmull.i = mul <8 x i16> %vec, %vec 603 ret <8 x i16> %vmull.i 604} 605 606define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) { 607; CHECK-LABEL: fct28: 608; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 609; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 610entry: 611 %addr = getelementptr i32, i32* %sp0, i64 %offset 612 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 613 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 614 %vmull.i = mul <2 x i32> %vec, %vec 615 ret <2 x i32> %vmull.i 616} 617 618define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) { 619; CHECK-LABEL: fct29: 620; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 621; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 622entry: 623 %addr = getelementptr i32, i32* %sp0, i64 %offset 624 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 625 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 626 %vmull.i = mul <4 x i32> %vec, %vec 627 ret <4 x i32> %vmull.i 628} 629 630define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) { 631; CHECK-LABEL: fct30: 632; CHECK: ldr d0, [x0, x1, lsl #3] 633entry: 634 %addr = getelementptr i64, i64* %sp0, i64 %offset 635 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 636 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 637 ret <1 x i64> %vec 638} 639 640define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) { 641; CHECK-LABEL: fct31: 642; CHECK: ldr d0, [x0, x1, lsl #3] 643entry: 644 %addr = getelementptr i64, i64* %sp0, i64 %offset 645 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 646 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 647 ret <2 x i64> %vec 648} 649