1; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s 2; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ 3; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s 4 5; CHECK: ldp_int 6; CHECK: ldp 7define i32 @ldp_int(i32* %p) nounwind { 8 %tmp = load i32, i32* %p, align 4 9 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 10 %tmp1 = load i32, i32* %add.ptr, align 4 11 %add = add nsw i32 %tmp1, %tmp 12 ret i32 %add 13} 14 15; CHECK: ldp_sext_int 16; CHECK: ldpsw 17define i64 @ldp_sext_int(i32* %p) nounwind { 18 %tmp = load i32, i32* %p, align 4 19 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 20 %tmp1 = load i32, i32* %add.ptr, align 4 21 %sexttmp = sext i32 %tmp to i64 22 %sexttmp1 = sext i32 %tmp1 to i64 23 %add = add nsw i64 %sexttmp1, %sexttmp 24 ret i64 %add 25} 26 27; CHECK-LABEL: ldp_half_sext_res0_int: 28; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 29; CHECK: sxtw x[[DST1]], w[[DST1]] 30define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { 31 %tmp = load i32, i32* %p, align 4 32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 33 %tmp1 = load i32, i32* %add.ptr, align 4 34 %sexttmp = sext i32 %tmp to i64 35 %sexttmp1 = zext i32 %tmp1 to i64 36 %add = add nsw i64 %sexttmp1, %sexttmp 37 ret i64 %add 38} 39 40; CHECK-LABEL: ldp_half_sext_res1_int: 41; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 42; CHECK: sxtw x[[DST2]], w[[DST2]] 43define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { 44 %tmp = load i32, i32* %p, align 4 45 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 46 %tmp1 = load i32, i32* %add.ptr, align 4 47 %sexttmp = zext i32 %tmp to i64 48 %sexttmp1 = sext i32 %tmp1 to i64 49 %add = add nsw i64 %sexttmp1, %sexttmp 50 ret i64 %add 51} 52 53 54; CHECK: ldp_long 55; CHECK: ldp 56define i64 @ldp_long(i64* %p) nounwind { 57 %tmp = load i64, i64* %p, align 8 58 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1 59 %tmp1 = load i64, i64* %add.ptr, align 8 60 %add = add nsw i64 %tmp1, %tmp 61 ret i64 %add 62} 63 64; CHECK: ldp_float 65; CHECK: ldp 66define float @ldp_float(float* %p) nounwind { 67 %tmp = load float, float* %p, align 4 68 %add.ptr = getelementptr inbounds float, float* %p, i64 1 69 %tmp1 = load float, float* %add.ptr, align 4 70 %add = fadd float %tmp, %tmp1 71 ret float %add 72} 73 74; CHECK: ldp_double 75; CHECK: ldp 76define double @ldp_double(double* %p) nounwind { 77 %tmp = load double, double* %p, align 8 78 %add.ptr = getelementptr inbounds double, double* %p, i64 1 79 %tmp1 = load double, double* %add.ptr, align 8 80 %add = fadd double %tmp, %tmp1 81 ret double %add 82} 83 84; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 85define i32 @ldur_int(i32* %a) nounwind { 86; LDUR_CHK: ldur_int 87; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] 88; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] 89; LDUR_CHK-NEXT: ret 90 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 91 %tmp1 = load i32, i32* %p1, align 2 92 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 93 %tmp2 = load i32, i32* %p2, align 2 94 %tmp3 = add i32 %tmp1, %tmp2 95 ret i32 %tmp3 96} 97 98define i64 @ldur_sext_int(i32* %a) nounwind { 99; LDUR_CHK: ldur_sext_int 100; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] 101; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 102; LDUR_CHK-NEXT: ret 103 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 104 %tmp1 = load i32, i32* %p1, align 2 105 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 106 %tmp2 = load i32, i32* %p2, align 2 107 %sexttmp1 = sext i32 %tmp1 to i64 108 %sexttmp2 = sext i32 %tmp2 to i64 109 %tmp3 = add i64 %sexttmp1, %sexttmp2 110 ret i64 %tmp3 111} 112 113define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { 114; LDUR_CHK: ldur_half_sext_int_res0 115; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 116; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] 117; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 118; LDUR_CHK-NEXT: ret 119 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 120 %tmp1 = load i32, i32* %p1, align 2 121 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 122 %tmp2 = load i32, i32* %p2, align 2 123 %sexttmp1 = zext i32 %tmp1 to i64 124 %sexttmp2 = sext i32 %tmp2 to i64 125 %tmp3 = add i64 %sexttmp1, %sexttmp2 126 ret i64 %tmp3 127} 128 129define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { 130; LDUR_CHK: ldur_half_sext_int_res1 131; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 132; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] 133; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 134; LDUR_CHK-NEXT: ret 135 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 136 %tmp1 = load i32, i32* %p1, align 2 137 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 138 %tmp2 = load i32, i32* %p2, align 2 139 %sexttmp1 = sext i32 %tmp1 to i64 140 %sexttmp2 = zext i32 %tmp2 to i64 141 %tmp3 = add i64 %sexttmp1, %sexttmp2 142 ret i64 %tmp3 143} 144 145 146define i64 @ldur_long(i64* %a) nounwind ssp { 147; LDUR_CHK: ldur_long 148; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] 149; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 150; LDUR_CHK-NEXT: ret 151 %p1 = getelementptr inbounds i64, i64* %a, i64 -1 152 %tmp1 = load i64, i64* %p1, align 2 153 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 154 %tmp2 = load i64, i64* %p2, align 2 155 %tmp3 = add i64 %tmp1, %tmp2 156 ret i64 %tmp3 157} 158 159define float @ldur_float(float* %a) { 160; LDUR_CHK: ldur_float 161; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] 162; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]] 163; LDUR_CHK-NEXT: ret 164 %p1 = getelementptr inbounds float, float* %a, i64 -1 165 %tmp1 = load float, float* %p1, align 2 166 %p2 = getelementptr inbounds float, float* %a, i64 -2 167 %tmp2 = load float, float* %p2, align 2 168 %tmp3 = fadd float %tmp1, %tmp2 169 ret float %tmp3 170} 171 172define double @ldur_double(double* %a) { 173; LDUR_CHK: ldur_double 174; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] 175; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]] 176; LDUR_CHK-NEXT: ret 177 %p1 = getelementptr inbounds double, double* %a, i64 -1 178 %tmp1 = load double, double* %p1, align 2 179 %p2 = getelementptr inbounds double, double* %a, i64 -2 180 %tmp2 = load double, double* %p2, align 2 181 %tmp3 = fadd double %tmp1, %tmp2 182 ret double %tmp3 183} 184 185; Now check some boundary conditions 186define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { 187; LDUR_CHK: pairUpBarelyIn 188; LDUR_CHK-NOT: ldur 189; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 190; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 191; LDUR_CHK-NEXT: ret 192 %p1 = getelementptr inbounds i64, i64* %a, i64 -31 193 %tmp1 = load i64, i64* %p1, align 2 194 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 195 %tmp2 = load i64, i64* %p2, align 2 196 %tmp3 = add i64 %tmp1, %tmp2 197 ret i64 %tmp3 198} 199 200define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { 201; LDUR_CHK: pairUpBarelyInSext 202; LDUR_CHK-NOT: ldur 203; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 204; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 205; LDUR_CHK-NEXT: ret 206 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 207 %tmp1 = load i32, i32* %p1, align 2 208 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 209 %tmp2 = load i32, i32* %p2, align 2 210 %sexttmp1 = sext i32 %tmp1 to i64 211 %sexttmp2 = sext i32 %tmp2 to i64 212 %tmp3 = add i64 %sexttmp1, %sexttmp2 213 ret i64 %tmp3 214} 215 216define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { 217; LDUR_CHK: pairUpBarelyInHalfSextRes0 218; LDUR_CHK-NOT: ldur 219; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 220; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] 221; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 222; LDUR_CHK-NEXT: ret 223 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 224 %tmp1 = load i32, i32* %p1, align 2 225 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 226 %tmp2 = load i32, i32* %p2, align 2 227 %sexttmp1 = zext i32 %tmp1 to i64 228 %sexttmp2 = sext i32 %tmp2 to i64 229 %tmp3 = add i64 %sexttmp1, %sexttmp2 230 ret i64 %tmp3 231} 232 233define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { 234; LDUR_CHK: pairUpBarelyInHalfSextRes1 235; LDUR_CHK-NOT: ldur 236; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 237; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] 238; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 239; LDUR_CHK-NEXT: ret 240 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 241 %tmp1 = load i32, i32* %p1, align 2 242 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 243 %tmp2 = load i32, i32* %p2, align 2 244 %sexttmp1 = sext i32 %tmp1 to i64 245 %sexttmp2 = zext i32 %tmp2 to i64 246 %tmp3 = add i64 %sexttmp1, %sexttmp2 247 ret i64 %tmp3 248} 249 250define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { 251; LDUR_CHK: pairUpBarelyOut 252; LDUR_CHK-NOT: ldp 253; Don't be fragile about which loads or manipulations of the base register 254; are used---just check that there isn't an ldp before the add 255; LDUR_CHK: add 256; LDUR_CHK-NEXT: ret 257 %p1 = getelementptr inbounds i64, i64* %a, i64 -32 258 %tmp1 = load i64, i64* %p1, align 2 259 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 260 %tmp2 = load i64, i64* %p2, align 2 261 %tmp3 = add i64 %tmp1, %tmp2 262 ret i64 %tmp3 263} 264 265define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { 266; LDUR_CHK: pairUpBarelyOutSext 267; LDUR_CHK-NOT: ldp 268; Don't be fragile about which loads or manipulations of the base register 269; are used---just check that there isn't an ldp before the add 270; LDUR_CHK: add 271; LDUR_CHK-NEXT: ret 272 %p1 = getelementptr inbounds i32, i32* %a, i64 -64 273 %tmp1 = load i32, i32* %p1, align 2 274 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 275 %tmp2 = load i32, i32* %p2, align 2 276 %sexttmp1 = sext i32 %tmp1 to i64 277 %sexttmp2 = sext i32 %tmp2 to i64 278 %tmp3 = add i64 %sexttmp1, %sexttmp2 279 ret i64 %tmp3 280} 281 282define i64 @pairUpNotAligned(i64* %a) nounwind ssp { 283; LDUR_CHK: pairUpNotAligned 284; LDUR_CHK-NOT: ldp 285; LDUR_CHK: ldur 286; LDUR_CHK-NEXT: ldur 287; LDUR_CHK-NEXT: add 288; LDUR_CHK-NEXT: ret 289 %p1 = getelementptr inbounds i64, i64* %a, i64 -18 290 %bp1 = bitcast i64* %p1 to i8* 291 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 292 %dp1 = bitcast i8* %bp1p1 to i64* 293 %tmp1 = load i64, i64* %dp1, align 1 294 295 %p2 = getelementptr inbounds i64, i64* %a, i64 -17 296 %bp2 = bitcast i64* %p2 to i8* 297 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 298 %dp2 = bitcast i8* %bp2p1 to i64* 299 %tmp2 = load i64, i64* %dp2, align 1 300 301 %tmp3 = add i64 %tmp1, %tmp2 302 ret i64 %tmp3 303} 304 305define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { 306; LDUR_CHK: pairUpNotAlignedSext 307; LDUR_CHK-NOT: ldp 308; LDUR_CHK: ldursw 309; LDUR_CHK-NEXT: ldursw 310; LDUR_CHK-NEXT: add 311; LDUR_CHK-NEXT: ret 312 %p1 = getelementptr inbounds i32, i32* %a, i64 -18 313 %bp1 = bitcast i32* %p1 to i8* 314 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 315 %dp1 = bitcast i8* %bp1p1 to i32* 316 %tmp1 = load i32, i32* %dp1, align 1 317 318 %p2 = getelementptr inbounds i32, i32* %a, i64 -17 319 %bp2 = bitcast i32* %p2 to i8* 320 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 321 %dp2 = bitcast i8* %bp2p1 to i32* 322 %tmp2 = load i32, i32* %dp2, align 1 323 324 %sexttmp1 = sext i32 %tmp1 to i64 325 %sexttmp2 = sext i32 %tmp2 to i64 326 %tmp3 = add i64 %sexttmp1, %sexttmp2 327 ret i64 %tmp3 328} 329