1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s 3; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6 4; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6 5; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7 6 7; i8* p; // p is 4 byte aligned 8; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 9define i32 @load_i32_by_i8_big_endian(i32* %arg) { 10; CHECK-LABEL: load_i32_by_i8_big_endian: 11; CHECK: @ %bb.0: 12; CHECK-NEXT: ldr r0, [r0] 13; CHECK-NEXT: mov pc, lr 14; 15; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian: 16; CHECK-ARMv6: @ %bb.0: 17; CHECK-ARMv6-NEXT: ldr r0, [r0] 18; CHECK-ARMv6-NEXT: bx lr 19; 20; CHECK-THUMBv6-LABEL: load_i32_by_i8_big_endian: 21; CHECK-THUMBv6: @ %bb.0: 22; CHECK-THUMBv6-NEXT: ldr r0, [r0] 23; CHECK-THUMBv6-NEXT: bx lr 24; 25; CHECK-THUMBv7-LABEL: load_i32_by_i8_big_endian: 26; CHECK-THUMBv7: @ %bb.0: 27; CHECK-THUMBv7-NEXT: ldr r0, [r0] 28; CHECK-THUMBv7-NEXT: bx lr 29 30 %tmp = bitcast i32* %arg to i8* 31 %tmp1 = load i8, i8* %tmp, align 4 32 %tmp2 = zext i8 %tmp1 to i32 33 %tmp3 = shl nuw nsw i32 %tmp2, 24 34 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 35 %tmp5 = load i8, i8* %tmp4, align 1 36 %tmp6 = zext i8 %tmp5 to i32 37 %tmp7 = shl nuw nsw i32 %tmp6, 16 38 %tmp8 = or i32 %tmp7, %tmp3 39 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 40 %tmp10 = load i8, i8* %tmp9, align 1 41 %tmp11 = zext i8 %tmp10 to i32 42 %tmp12 = shl nuw nsw i32 %tmp11, 8 43 %tmp13 = or i32 %tmp8, %tmp12 44 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 45 %tmp15 = load i8, i8* %tmp14, align 1 46 %tmp16 = zext i8 %tmp15 to i32 47 %tmp17 = or i32 %tmp13, %tmp16 48 ret i32 %tmp17 49} 50 51; i8* p; // p is 4 byte aligned 52; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 53define i32 @load_i32_by_i8_bswap(i32* %arg) { 54; BSWAP is not supported by 32 bit target 55; CHECK-LABEL: load_i32_by_i8_bswap: 56; CHECK: @ %bb.0: 57; CHECK-NEXT: ldr r0, [r0] 58; CHECK-NEXT: mov r1, #65280 59; CHECK-NEXT: mov r2, #16711680 60; CHECK-NEXT: and r1, r1, r0, lsr #8 61; CHECK-NEXT: and r2, r2, r0, lsl #8 62; CHECK-NEXT: orr r1, r1, r0, lsr #24 63; CHECK-NEXT: orr r0, r2, r0, lsl #24 64; CHECK-NEXT: orr r0, r0, r1 65; CHECK-NEXT: mov pc, lr 66; 67; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap: 68; CHECK-ARMv6: @ %bb.0: 69; CHECK-ARMv6-NEXT: ldr r0, [r0] 70; CHECK-ARMv6-NEXT: rev r0, r0 71; CHECK-ARMv6-NEXT: bx lr 72; 73; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap: 74; CHECK-THUMBv6: @ %bb.0: 75; CHECK-THUMBv6-NEXT: ldr r0, [r0] 76; CHECK-THUMBv6-NEXT: rev r0, r0 77; CHECK-THUMBv6-NEXT: bx lr 78; 79; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap: 80; CHECK-THUMBv7: @ %bb.0: 81; CHECK-THUMBv7-NEXT: ldr r0, [r0] 82; CHECK-THUMBv7-NEXT: rev r0, r0 83; CHECK-THUMBv7-NEXT: bx lr 84 85 %tmp = bitcast i32* %arg to i8* 86 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 87 %tmp2 = load i8, i8* %tmp1, align 4 88 %tmp3 = zext i8 %tmp2 to i32 89 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 90 %tmp5 = load i8, i8* %tmp4, align 1 91 %tmp6 = zext i8 %tmp5 to i32 92 %tmp7 = shl nuw nsw i32 %tmp6, 8 93 %tmp8 = or i32 %tmp7, %tmp3 94 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 95 %tmp10 = load i8, i8* %tmp9, align 1 96 %tmp11 = zext i8 %tmp10 to i32 97 %tmp12 = shl nuw nsw i32 %tmp11, 16 98 %tmp13 = or i32 %tmp8, %tmp12 99 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 100 %tmp15 = load i8, i8* %tmp14, align 1 101 %tmp16 = zext i8 %tmp15 to i32 102 %tmp17 = shl nuw nsw i32 %tmp16, 24 103 %tmp18 = or i32 %tmp13, %tmp17 104 ret i32 %tmp18 105} 106 107; i8* p; // p is 4 byte aligned 108; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) 109define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) { 110; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian: 111; CHECK: @ %bb.0: 112; CHECK-NEXT: ldr r0, [r0] 113; CHECK-NEXT: mov pc, lr 114; 115; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian: 116; CHECK-ARMv6: @ %bb.0: 117; CHECK-ARMv6-NEXT: ldr r0, [r0] 118; CHECK-ARMv6-NEXT: bx lr 119; 120; CHECK-THUMBv6-LABEL: load_i32_by_i16_by_i8_big_endian: 121; CHECK-THUMBv6: @ %bb.0: 122; CHECK-THUMBv6-NEXT: ldr r0, [r0] 123; CHECK-THUMBv6-NEXT: bx lr 124; 125; CHECK-THUMBv7-LABEL: load_i32_by_i16_by_i8_big_endian: 126; CHECK-THUMBv7: @ %bb.0: 127; CHECK-THUMBv7-NEXT: ldr r0, [r0] 128; CHECK-THUMBv7-NEXT: bx lr 129 130 %tmp = bitcast i32* %arg to i8* 131 %tmp1 = load i8, i8* %tmp, align 4 132 %tmp2 = zext i8 %tmp1 to i16 133 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 134 %tmp4 = load i8, i8* %tmp3, align 1 135 %tmp5 = zext i8 %tmp4 to i16 136 %tmp6 = shl nuw nsw i16 %tmp2, 8 137 %tmp7 = or i16 %tmp6, %tmp5 138 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 139 %tmp9 = load i8, i8* %tmp8, align 1 140 %tmp10 = zext i8 %tmp9 to i16 141 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 142 %tmp12 = load i8, i8* %tmp11, align 1 143 %tmp13 = zext i8 %tmp12 to i16 144 %tmp14 = shl nuw nsw i16 %tmp10, 8 145 %tmp15 = or i16 %tmp14, %tmp13 146 %tmp16 = zext i16 %tmp7 to i32 147 %tmp17 = zext i16 %tmp15 to i32 148 %tmp18 = shl nuw nsw i32 %tmp16, 16 149 %tmp19 = or i32 %tmp18, %tmp17 150 ret i32 %tmp19 151} 152 153; i16* p; // p is 4 byte aligned 154; ((i32) p[0] << 16) | (i32) p[1] 155define i32 @load_i32_by_i16(i32* %arg) { 156; CHECK-LABEL: load_i32_by_i16: 157; CHECK: @ %bb.0: 158; CHECK-NEXT: ldr r0, [r0] 159; CHECK-NEXT: mov pc, lr 160; 161; CHECK-ARMv6-LABEL: load_i32_by_i16: 162; CHECK-ARMv6: @ %bb.0: 163; CHECK-ARMv6-NEXT: ldr r0, [r0] 164; CHECK-ARMv6-NEXT: bx lr 165; 166; CHECK-THUMBv6-LABEL: load_i32_by_i16: 167; CHECK-THUMBv6: @ %bb.0: 168; CHECK-THUMBv6-NEXT: ldr r0, [r0] 169; CHECK-THUMBv6-NEXT: bx lr 170; 171; CHECK-THUMBv7-LABEL: load_i32_by_i16: 172; CHECK-THUMBv7: @ %bb.0: 173; CHECK-THUMBv7-NEXT: ldr r0, [r0] 174; CHECK-THUMBv7-NEXT: bx lr 175 176 %tmp = bitcast i32* %arg to i16* 177 %tmp1 = load i16, i16* %tmp, align 4 178 %tmp2 = zext i16 %tmp1 to i32 179 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 180 %tmp4 = load i16, i16* %tmp3, align 1 181 %tmp5 = zext i16 %tmp4 to i32 182 %tmp6 = shl nuw nsw i32 %tmp2, 16 183 %tmp7 = or i32 %tmp6, %tmp5 184 ret i32 %tmp7 185} 186 187; i16* p_16; // p_16 is 4 byte aligned 188; i8* p_8 = (i8*) p_16; 189; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3] 190define i32 @load_i32_by_i16_i8(i32* %arg) { 191; CHECK-LABEL: load_i32_by_i16_i8: 192; CHECK: @ %bb.0: 193; CHECK-NEXT: ldr r0, [r0] 194; CHECK-NEXT: mov pc, lr 195; 196; CHECK-ARMv6-LABEL: load_i32_by_i16_i8: 197; CHECK-ARMv6: @ %bb.0: 198; CHECK-ARMv6-NEXT: ldr r0, [r0] 199; CHECK-ARMv6-NEXT: bx lr 200; 201; CHECK-THUMBv6-LABEL: load_i32_by_i16_i8: 202; CHECK-THUMBv6: @ %bb.0: 203; CHECK-THUMBv6-NEXT: ldr r0, [r0] 204; CHECK-THUMBv6-NEXT: bx lr 205; 206; CHECK-THUMBv7-LABEL: load_i32_by_i16_i8: 207; CHECK-THUMBv7: @ %bb.0: 208; CHECK-THUMBv7-NEXT: ldr r0, [r0] 209; CHECK-THUMBv7-NEXT: bx lr 210 211 %tmp = bitcast i32* %arg to i16* 212 %tmp1 = bitcast i32* %arg to i8* 213 %tmp2 = load i16, i16* %tmp, align 4 214 %tmp3 = zext i16 %tmp2 to i32 215 %tmp4 = shl nuw nsw i32 %tmp3, 16 216 %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2 217 %tmp6 = load i8, i8* %tmp5, align 1 218 %tmp7 = zext i8 %tmp6 to i32 219 %tmp8 = shl nuw nsw i32 %tmp7, 8 220 %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3 221 %tmp10 = load i8, i8* %tmp9, align 1 222 %tmp11 = zext i8 %tmp10 to i32 223 %tmp12 = or i32 %tmp8, %tmp11 224 %tmp13 = or i32 %tmp12, %tmp4 225 ret i32 %tmp13 226} 227 228; i8* p; // p is 8 byte aligned 229; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 230define i64 @load_i64_by_i8_bswap(i64* %arg) { 231; CHECK-LABEL: load_i64_by_i8_bswap: 232; CHECK: @ %bb.0: 233; CHECK-NEXT: push {r11, lr} 234; CHECK-NEXT: ldr r1, [r0] 235; CHECK-NEXT: mov r12, #65280 236; CHECK-NEXT: ldr r0, [r0, #4] 237; CHECK-NEXT: mov lr, #16711680 238; CHECK-NEXT: and r3, r12, r0, lsr #8 239; CHECK-NEXT: and r2, lr, r0, lsl #8 240; CHECK-NEXT: orr r3, r3, r0, lsr #24 241; CHECK-NEXT: orr r0, r2, r0, lsl #24 242; CHECK-NEXT: and r2, r12, r1, lsr #8 243; CHECK-NEXT: orr r0, r0, r3 244; CHECK-NEXT: and r3, lr, r1, lsl #8 245; CHECK-NEXT: orr r2, r2, r1, lsr #24 246; CHECK-NEXT: orr r1, r3, r1, lsl #24 247; CHECK-NEXT: orr r1, r1, r2 248; CHECK-NEXT: pop {r11, lr} 249; CHECK-NEXT: mov pc, lr 250; 251; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: 252; CHECK-ARMv6: @ %bb.0: 253; CHECK-ARMv6-NEXT: ldrd r2, r3, [r0] 254; CHECK-ARMv6-NEXT: rev r0, r3 255; CHECK-ARMv6-NEXT: rev r1, r2 256; CHECK-ARMv6-NEXT: bx lr 257; 258; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap: 259; CHECK-THUMBv6: @ %bb.0: 260; CHECK-THUMBv6-NEXT: ldr r1, [r0] 261; CHECK-THUMBv6-NEXT: ldr r0, [r0, #4] 262; CHECK-THUMBv6-NEXT: rev r0, r0 263; CHECK-THUMBv6-NEXT: rev r1, r1 264; CHECK-THUMBv6-NEXT: bx lr 265; 266; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap: 267; CHECK-THUMBv7: @ %bb.0: 268; CHECK-THUMBv7-NEXT: ldr r1, [r0] 269; CHECK-THUMBv7-NEXT: ldr r0, [r0, #4] 270; CHECK-THUMBv7-NEXT: rev r0, r0 271; CHECK-THUMBv7-NEXT: rev r1, r1 272; CHECK-THUMBv7-NEXT: bx lr 273 274 %tmp = bitcast i64* %arg to i8* 275 %tmp1 = load i8, i8* %tmp, align 8 276 %tmp2 = zext i8 %tmp1 to i64 277 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 278 %tmp4 = load i8, i8* %tmp3, align 1 279 %tmp5 = zext i8 %tmp4 to i64 280 %tmp6 = shl nuw nsw i64 %tmp5, 8 281 %tmp7 = or i64 %tmp6, %tmp2 282 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 283 %tmp9 = load i8, i8* %tmp8, align 1 284 %tmp10 = zext i8 %tmp9 to i64 285 %tmp11 = shl nuw nsw i64 %tmp10, 16 286 %tmp12 = or i64 %tmp7, %tmp11 287 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 288 %tmp14 = load i8, i8* %tmp13, align 1 289 %tmp15 = zext i8 %tmp14 to i64 290 %tmp16 = shl nuw nsw i64 %tmp15, 24 291 %tmp17 = or i64 %tmp12, %tmp16 292 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 293 %tmp19 = load i8, i8* %tmp18, align 1 294 %tmp20 = zext i8 %tmp19 to i64 295 %tmp21 = shl nuw nsw i64 %tmp20, 32 296 %tmp22 = or i64 %tmp17, %tmp21 297 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 298 %tmp24 = load i8, i8* %tmp23, align 1 299 %tmp25 = zext i8 %tmp24 to i64 300 %tmp26 = shl nuw nsw i64 %tmp25, 40 301 %tmp27 = or i64 %tmp22, %tmp26 302 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 303 %tmp29 = load i8, i8* %tmp28, align 1 304 %tmp30 = zext i8 %tmp29 to i64 305 %tmp31 = shl nuw nsw i64 %tmp30, 48 306 %tmp32 = or i64 %tmp27, %tmp31 307 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 308 %tmp34 = load i8, i8* %tmp33, align 1 309 %tmp35 = zext i8 %tmp34 to i64 310 %tmp36 = shl nuw i64 %tmp35, 56 311 %tmp37 = or i64 %tmp32, %tmp36 312 ret i64 %tmp37 313} 314 315; i8* p; // p is 8 byte aligned 316; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 317define i64 @load_i64_by_i8(i64* %arg) { 318; CHECK-LABEL: load_i64_by_i8: 319; CHECK: @ %bb.0: 320; CHECK-NEXT: ldr r2, [r0] 321; CHECK-NEXT: ldr r1, [r0, #4] 322; CHECK-NEXT: mov r0, r2 323; CHECK-NEXT: mov pc, lr 324; 325; CHECK-ARMv6-LABEL: load_i64_by_i8: 326; CHECK-ARMv6: @ %bb.0: 327; CHECK-ARMv6-NEXT: ldrd r0, r1, [r0] 328; CHECK-ARMv6-NEXT: bx lr 329; 330; CHECK-THUMBv6-LABEL: load_i64_by_i8: 331; CHECK-THUMBv6: @ %bb.0: 332; CHECK-THUMBv6-NEXT: ldr r2, [r0] 333; CHECK-THUMBv6-NEXT: ldr r1, [r0, #4] 334; CHECK-THUMBv6-NEXT: mov r0, r2 335; CHECK-THUMBv6-NEXT: bx lr 336; 337; CHECK-THUMBv7-LABEL: load_i64_by_i8: 338; CHECK-THUMBv7: @ %bb.0: 339; CHECK-THUMBv7-NEXT: ldr r2, [r0] 340; CHECK-THUMBv7-NEXT: ldr r1, [r0, #4] 341; CHECK-THUMBv7-NEXT: mov r0, r2 342; CHECK-THUMBv7-NEXT: bx lr 343 344 %tmp = bitcast i64* %arg to i8* 345 %tmp1 = load i8, i8* %tmp, align 8 346 %tmp2 = zext i8 %tmp1 to i64 347 %tmp3 = shl nuw i64 %tmp2, 56 348 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 349 %tmp5 = load i8, i8* %tmp4, align 1 350 %tmp6 = zext i8 %tmp5 to i64 351 %tmp7 = shl nuw nsw i64 %tmp6, 48 352 %tmp8 = or i64 %tmp7, %tmp3 353 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 354 %tmp10 = load i8, i8* %tmp9, align 1 355 %tmp11 = zext i8 %tmp10 to i64 356 %tmp12 = shl nuw nsw i64 %tmp11, 40 357 %tmp13 = or i64 %tmp8, %tmp12 358 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 359 %tmp15 = load i8, i8* %tmp14, align 1 360 %tmp16 = zext i8 %tmp15 to i64 361 %tmp17 = shl nuw nsw i64 %tmp16, 32 362 %tmp18 = or i64 %tmp13, %tmp17 363 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 364 %tmp20 = load i8, i8* %tmp19, align 1 365 %tmp21 = zext i8 %tmp20 to i64 366 %tmp22 = shl nuw nsw i64 %tmp21, 24 367 %tmp23 = or i64 %tmp18, %tmp22 368 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 369 %tmp25 = load i8, i8* %tmp24, align 1 370 %tmp26 = zext i8 %tmp25 to i64 371 %tmp27 = shl nuw nsw i64 %tmp26, 16 372 %tmp28 = or i64 %tmp23, %tmp27 373 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 374 %tmp30 = load i8, i8* %tmp29, align 1 375 %tmp31 = zext i8 %tmp30 to i64 376 %tmp32 = shl nuw nsw i64 %tmp31, 8 377 %tmp33 = or i64 %tmp28, %tmp32 378 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 379 %tmp35 = load i8, i8* %tmp34, align 1 380 %tmp36 = zext i8 %tmp35 to i64 381 %tmp37 = or i64 %tmp33, %tmp36 382 ret i64 %tmp37 383} 384 385; i8* p; // p[1] is 4 byte aligned 386; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 387define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { 388; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 389; CHECK: @ %bb.0: 390; CHECK-NEXT: ldr r0, [r0, #1] 391; CHECK-NEXT: mov r1, #65280 392; CHECK-NEXT: mov r2, #16711680 393; CHECK-NEXT: and r1, r1, r0, lsr #8 394; CHECK-NEXT: and r2, r2, r0, lsl #8 395; CHECK-NEXT: orr r1, r1, r0, lsr #24 396; CHECK-NEXT: orr r0, r2, r0, lsl #24 397; CHECK-NEXT: orr r0, r0, r1 398; CHECK-NEXT: mov pc, lr 399; 400; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset: 401; CHECK-ARMv6: @ %bb.0: 402; CHECK-ARMv6-NEXT: ldr r0, [r0, #1] 403; CHECK-ARMv6-NEXT: rev r0, r0 404; CHECK-ARMv6-NEXT: bx lr 405; 406; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset: 407; CHECK-THUMBv6: @ %bb.0: 408; CHECK-THUMBv6-NEXT: movs r1, #1 409; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1] 410; CHECK-THUMBv6-NEXT: rev r0, r0 411; CHECK-THUMBv6-NEXT: bx lr 412; 413; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset: 414; CHECK-THUMBv7: @ %bb.0: 415; CHECK-THUMBv7-NEXT: movs r1, #1 416; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1] 417; CHECK-THUMBv7-NEXT: rev r0, r0 418; CHECK-THUMBv7-NEXT: bx lr 419 420 421 %tmp = bitcast i32* %arg to i8* 422 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 423 %tmp2 = load i8, i8* %tmp1, align 4 424 %tmp3 = zext i8 %tmp2 to i32 425 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 426 %tmp5 = load i8, i8* %tmp4, align 1 427 %tmp6 = zext i8 %tmp5 to i32 428 %tmp7 = shl nuw nsw i32 %tmp6, 8 429 %tmp8 = or i32 %tmp7, %tmp3 430 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 431 %tmp10 = load i8, i8* %tmp9, align 1 432 %tmp11 = zext i8 %tmp10 to i32 433 %tmp12 = shl nuw nsw i32 %tmp11, 16 434 %tmp13 = or i32 %tmp8, %tmp12 435 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 436 %tmp15 = load i8, i8* %tmp14, align 1 437 %tmp16 = zext i8 %tmp15 to i32 438 %tmp17 = shl nuw nsw i32 %tmp16, 24 439 %tmp18 = or i32 %tmp13, %tmp17 440 ret i32 %tmp18 441} 442 443; i8* p; // p[-4] is 4 byte aligned 444; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 445define i32 @load_i32_by_i8_neg_offset(i32* %arg) { 446; CHECK-LABEL: load_i32_by_i8_neg_offset: 447; CHECK: @ %bb.0: 448; CHECK-NEXT: ldr r0, [r0, #-4] 449; CHECK-NEXT: mov r1, #65280 450; CHECK-NEXT: mov r2, #16711680 451; CHECK-NEXT: and r1, r1, r0, lsr #8 452; CHECK-NEXT: and r2, r2, r0, lsl #8 453; CHECK-NEXT: orr r1, r1, r0, lsr #24 454; CHECK-NEXT: orr r0, r2, r0, lsl #24 455; CHECK-NEXT: orr r0, r0, r1 456; CHECK-NEXT: mov pc, lr 457; 458; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset: 459; CHECK-ARMv6: @ %bb.0: 460; CHECK-ARMv6-NEXT: ldr r0, [r0, #-4] 461; CHECK-ARMv6-NEXT: rev r0, r0 462; CHECK-ARMv6-NEXT: bx lr 463; 464; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset: 465; CHECK-THUMBv6: @ %bb.0: 466; CHECK-THUMBv6-NEXT: subs r0, r0, #4 467; CHECK-THUMBv6-NEXT: ldr r0, [r0] 468; CHECK-THUMBv6-NEXT: rev r0, r0 469; CHECK-THUMBv6-NEXT: bx lr 470; 471; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset: 472; CHECK-THUMBv7: @ %bb.0: 473; CHECK-THUMBv7-NEXT: subs r0, r0, #4 474; CHECK-THUMBv7-NEXT: ldr r0, [r0] 475; CHECK-THUMBv7-NEXT: rev r0, r0 476; CHECK-THUMBv7-NEXT: bx lr 477 478 479 %tmp = bitcast i32* %arg to i8* 480 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 481 %tmp2 = load i8, i8* %tmp1, align 4 482 %tmp3 = zext i8 %tmp2 to i32 483 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 484 %tmp5 = load i8, i8* %tmp4, align 1 485 %tmp6 = zext i8 %tmp5 to i32 486 %tmp7 = shl nuw nsw i32 %tmp6, 8 487 %tmp8 = or i32 %tmp7, %tmp3 488 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 489 %tmp10 = load i8, i8* %tmp9, align 1 490 %tmp11 = zext i8 %tmp10 to i32 491 %tmp12 = shl nuw nsw i32 %tmp11, 16 492 %tmp13 = or i32 %tmp8, %tmp12 493 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 494 %tmp15 = load i8, i8* %tmp14, align 1 495 %tmp16 = zext i8 %tmp15 to i32 496 %tmp17 = shl nuw nsw i32 %tmp16, 24 497 %tmp18 = or i32 %tmp13, %tmp17 498 ret i32 %tmp18 499} 500 501; i8* p; // p[1] is 4 byte aligned 502; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 503define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { 504; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: 505; CHECK: @ %bb.0: 506; CHECK-NEXT: ldr r0, [r0, #1] 507; CHECK-NEXT: mov pc, lr 508; 509; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap: 510; CHECK-ARMv6: @ %bb.0: 511; CHECK-ARMv6-NEXT: ldr r0, [r0, #1] 512; CHECK-ARMv6-NEXT: bx lr 513; 514; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap: 515; CHECK-THUMBv6: @ %bb.0: 516; CHECK-THUMBv6-NEXT: movs r1, #1 517; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1] 518; CHECK-THUMBv6-NEXT: bx lr 519; 520; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap: 521; CHECK-THUMBv7: @ %bb.0: 522; CHECK-THUMBv7-NEXT: movs r1, #1 523; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1] 524; CHECK-THUMBv7-NEXT: bx lr 525 526 527 %tmp = bitcast i32* %arg to i8* 528 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 529 %tmp2 = load i8, i8* %tmp1, align 1 530 %tmp3 = zext i8 %tmp2 to i32 531 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 532 %tmp5 = load i8, i8* %tmp4, align 1 533 %tmp6 = zext i8 %tmp5 to i32 534 %tmp7 = shl nuw nsw i32 %tmp6, 8 535 %tmp8 = or i32 %tmp7, %tmp3 536 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 537 %tmp10 = load i8, i8* %tmp9, align 1 538 %tmp11 = zext i8 %tmp10 to i32 539 %tmp12 = shl nuw nsw i32 %tmp11, 16 540 %tmp13 = or i32 %tmp8, %tmp12 541 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 542 %tmp15 = load i8, i8* %tmp14, align 4 543 %tmp16 = zext i8 %tmp15 to i32 544 %tmp17 = shl nuw nsw i32 %tmp16, 24 545 %tmp18 = or i32 %tmp13, %tmp17 546 ret i32 %tmp18 547} 548 549; i8* p; // p[-4] is 4 byte aligned 550; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 551define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { 552; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: 553; CHECK: @ %bb.0: 554; CHECK-NEXT: ldr r0, [r0, #-4] 555; CHECK-NEXT: mov pc, lr 556; 557; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap: 558; CHECK-ARMv6: @ %bb.0: 559; CHECK-ARMv6-NEXT: ldr r0, [r0, #-4] 560; CHECK-ARMv6-NEXT: bx lr 561; 562; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap: 563; CHECK-THUMBv6: @ %bb.0: 564; CHECK-THUMBv6-NEXT: subs r0, r0, #4 565; CHECK-THUMBv6-NEXT: ldr r0, [r0] 566; CHECK-THUMBv6-NEXT: bx lr 567; 568; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap: 569; CHECK-THUMBv7: @ %bb.0: 570; CHECK-THUMBv7-NEXT: subs r0, r0, #4 571; CHECK-THUMBv7-NEXT: ldr r0, [r0] 572; CHECK-THUMBv7-NEXT: bx lr 573 574 575 %tmp = bitcast i32* %arg to i8* 576 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 577 %tmp2 = load i8, i8* %tmp1, align 1 578 %tmp3 = zext i8 %tmp2 to i32 579 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 580 %tmp5 = load i8, i8* %tmp4, align 1 581 %tmp6 = zext i8 %tmp5 to i32 582 %tmp7 = shl nuw nsw i32 %tmp6, 8 583 %tmp8 = or i32 %tmp7, %tmp3 584 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 585 %tmp10 = load i8, i8* %tmp9, align 1 586 %tmp11 = zext i8 %tmp10 to i32 587 %tmp12 = shl nuw nsw i32 %tmp11, 16 588 %tmp13 = or i32 %tmp8, %tmp12 589 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 590 %tmp15 = load i8, i8* %tmp14, align 4 591 %tmp16 = zext i8 %tmp15 to i32 592 %tmp17 = shl nuw nsw i32 %tmp16, 24 593 %tmp18 = or i32 %tmp13, %tmp17 594 ret i32 %tmp18 595} 596 597declare i16 @llvm.bswap.i16(i16) 598 599; i16* p; // p is 4 byte aligned 600; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16) 601define i32 @load_i32_by_bswap_i16(i32* %arg) { 602; CHECK-LABEL: load_i32_by_bswap_i16: 603; CHECK: @ %bb.0: 604; CHECK-NEXT: ldr r0, [r0] 605; CHECK-NEXT: mov r1, #65280 606; CHECK-NEXT: mov r2, #16711680 607; CHECK-NEXT: and r1, r1, r0, lsr #8 608; CHECK-NEXT: and r2, r2, r0, lsl #8 609; CHECK-NEXT: orr r1, r1, r0, lsr #24 610; CHECK-NEXT: orr r0, r2, r0, lsl #24 611; CHECK-NEXT: orr r0, r0, r1 612; CHECK-NEXT: mov pc, lr 613; 614; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16: 615; CHECK-ARMv6: @ %bb.0: 616; CHECK-ARMv6-NEXT: ldr r0, [r0] 617; CHECK-ARMv6-NEXT: rev r0, r0 618; CHECK-ARMv6-NEXT: bx lr 619; 620; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16: 621; CHECK-THUMBv6: @ %bb.0: 622; CHECK-THUMBv6-NEXT: ldr r0, [r0] 623; CHECK-THUMBv6-NEXT: rev r0, r0 624; CHECK-THUMBv6-NEXT: bx lr 625; 626; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16: 627; CHECK-THUMBv7: @ %bb.0: 628; CHECK-THUMBv7-NEXT: ldr r0, [r0] 629; CHECK-THUMBv7-NEXT: rev r0, r0 630; CHECK-THUMBv7-NEXT: bx lr 631 632 633 %tmp = bitcast i32* %arg to i16* 634 %tmp1 = load i16, i16* %tmp, align 4 635 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 636 %tmp2 = zext i16 %tmp11 to i32 637 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 638 %tmp4 = load i16, i16* %tmp3, align 1 639 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 640 %tmp5 = zext i16 %tmp41 to i32 641 %tmp6 = shl nuw nsw i32 %tmp5, 16 642 %tmp7 = or i32 %tmp6, %tmp2 643 ret i32 %tmp7 644} 645 646; i16* p; // p is 4 byte aligned 647; (i32) p[1] | (sext(p[0] << 16) to i32) 648define i32 @load_i32_by_sext_i16(i32* %arg) { 649; CHECK-LABEL: load_i32_by_sext_i16: 650; CHECK: @ %bb.0: 651; CHECK-NEXT: ldr r0, [r0] 652; CHECK-NEXT: mov pc, lr 653; 654; CHECK-ARMv6-LABEL: load_i32_by_sext_i16: 655; CHECK-ARMv6: @ %bb.0: 656; CHECK-ARMv6-NEXT: ldr r0, [r0] 657; CHECK-ARMv6-NEXT: bx lr 658; 659; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16: 660; CHECK-THUMBv6: @ %bb.0: 661; CHECK-THUMBv6-NEXT: ldr r0, [r0] 662; CHECK-THUMBv6-NEXT: bx lr 663; 664; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16: 665; CHECK-THUMBv7: @ %bb.0: 666; CHECK-THUMBv7-NEXT: ldr r0, [r0] 667; CHECK-THUMBv7-NEXT: bx lr 668 %tmp = bitcast i32* %arg to i16* 669 %tmp1 = load i16, i16* %tmp, align 4 670 %tmp2 = sext i16 %tmp1 to i32 671 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 672 %tmp4 = load i16, i16* %tmp3, align 1 673 %tmp5 = zext i16 %tmp4 to i32 674 %tmp6 = shl nuw nsw i32 %tmp2, 16 675 %tmp7 = or i32 %tmp6, %tmp5 676 ret i32 %tmp7 677} 678 679; i8* arg; i32 i; 680; p = arg + 12; 681; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 682define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { 683; CHECK-LABEL: load_i32_by_i8_base_offset_index: 684; CHECK: @ %bb.0: 685; CHECK-NEXT: add r0, r0, r1 686; CHECK-NEXT: mov r1, #65280 687; CHECK-NEXT: mov r2, #16711680 688; CHECK-NEXT: ldr r0, [r0, #12] 689; CHECK-NEXT: and r1, r1, r0, lsr #8 690; CHECK-NEXT: and r2, r2, r0, lsl #8 691; CHECK-NEXT: orr r1, r1, r0, lsr #24 692; CHECK-NEXT: orr r0, r2, r0, lsl #24 693; CHECK-NEXT: orr r0, r0, r1 694; CHECK-NEXT: mov pc, lr 695; 696; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index: 697; CHECK-ARMv6: @ %bb.0: 698; CHECK-ARMv6-NEXT: add r0, r0, r1 699; CHECK-ARMv6-NEXT: ldr r0, [r0, #12] 700; CHECK-ARMv6-NEXT: rev r0, r0 701; CHECK-ARMv6-NEXT: bx lr 702; 703; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index: 704; CHECK-THUMBv6: @ %bb.0: 705; CHECK-THUMBv6-NEXT: adds r0, r0, r1 706; CHECK-THUMBv6-NEXT: ldr r0, [r0, #12] 707; CHECK-THUMBv6-NEXT: rev r0, r0 708; CHECK-THUMBv6-NEXT: bx lr 709; 710; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index: 711; CHECK-THUMBv7: @ %bb.0: 712; CHECK-THUMBv7-NEXT: adds r0, r0, r1 713; CHECK-THUMBv7-NEXT: ldr r0, [r0, #12] 714; CHECK-THUMBv7-NEXT: rev r0, r0 715; CHECK-THUMBv7-NEXT: bx lr 716 %tmp = add nuw nsw i32 %i, 3 717 %tmp2 = add nuw nsw i32 %i, 2 718 %tmp3 = add nuw nsw i32 %i, 1 719 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 720 %tmp5 = zext i32 %i to i64 721 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 722 %tmp7 = load i8, i8* %tmp6, align 4 723 %tmp8 = zext i8 %tmp7 to i32 724 %tmp9 = zext i32 %tmp3 to i64 725 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 726 %tmp11 = load i8, i8* %tmp10, align 1 727 %tmp12 = zext i8 %tmp11 to i32 728 %tmp13 = shl nuw nsw i32 %tmp12, 8 729 %tmp14 = or i32 %tmp13, %tmp8 730 %tmp15 = zext i32 %tmp2 to i64 731 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 732 %tmp17 = load i8, i8* %tmp16, align 1 733 %tmp18 = zext i8 %tmp17 to i32 734 %tmp19 = shl nuw nsw i32 %tmp18, 16 735 %tmp20 = or i32 %tmp14, %tmp19 736 %tmp21 = zext i32 %tmp to i64 737 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 738 %tmp23 = load i8, i8* %tmp22, align 1 739 %tmp24 = zext i8 %tmp23 to i32 740 %tmp25 = shl nuw i32 %tmp24, 24 741 %tmp26 = or i32 %tmp20, %tmp25 742 ret i32 %tmp26 743} 744 745; i8* arg; i32 i; 746; p = arg + 12; 747; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 748define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { 749; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 750; CHECK: @ %bb.0: 751; CHECK-NEXT: add r0, r1, r0 752; CHECK-NEXT: mov r1, #65280 753; CHECK-NEXT: mov r2, #16711680 754; CHECK-NEXT: ldr r0, [r0, #13] 755; CHECK-NEXT: and r1, r1, r0, lsr #8 756; CHECK-NEXT: and r2, r2, r0, lsl #8 757; CHECK-NEXT: orr r1, r1, r0, lsr #24 758; CHECK-NEXT: orr r0, r2, r0, lsl #24 759; CHECK-NEXT: orr r0, r0, r1 760; CHECK-NEXT: mov pc, lr 761; 762; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2: 763; CHECK-ARMv6: @ %bb.0: 764; CHECK-ARMv6-NEXT: add r0, r1, r0 765; CHECK-ARMv6-NEXT: ldr r0, [r0, #13] 766; CHECK-ARMv6-NEXT: rev r0, r0 767; CHECK-ARMv6-NEXT: bx lr 768; 769; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2: 770; CHECK-THUMBv6: @ %bb.0: 771; CHECK-THUMBv6-NEXT: adds r0, r1, r0 772; CHECK-THUMBv6-NEXT: movs r1, #13 773; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1] 774; CHECK-THUMBv6-NEXT: rev r0, r0 775; CHECK-THUMBv6-NEXT: bx lr 776; 777; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2: 778; CHECK-THUMBv7: @ %bb.0: 779; CHECK-THUMBv7-NEXT: adds r0, r1, r0 780; CHECK-THUMBv7-NEXT: movs r1, #13 781; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1] 782; CHECK-THUMBv7-NEXT: rev r0, r0 783; CHECK-THUMBv7-NEXT: bx lr 784 785 %tmp = add nuw nsw i32 %i, 4 786 %tmp2 = add nuw nsw i32 %i, 3 787 %tmp3 = add nuw nsw i32 %i, 2 788 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 789 %tmp5 = add nuw nsw i32 %i, 1 790 %tmp27 = zext i32 %tmp5 to i64 791 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 792 %tmp29 = load i8, i8* %tmp28, align 4 793 %tmp30 = zext i8 %tmp29 to i32 794 %tmp31 = zext i32 %tmp3 to i64 795 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 796 %tmp33 = load i8, i8* %tmp32, align 1 797 %tmp34 = zext i8 %tmp33 to i32 798 %tmp35 = shl nuw nsw i32 %tmp34, 8 799 %tmp36 = or i32 %tmp35, %tmp30 800 %tmp37 = zext i32 %tmp2 to i64 801 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 802 %tmp39 = load i8, i8* %tmp38, align 1 803 %tmp40 = zext i8 %tmp39 to i32 804 %tmp41 = shl nuw nsw i32 %tmp40, 16 805 %tmp42 = or i32 %tmp36, %tmp41 806 %tmp43 = zext i32 %tmp to i64 807 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 808 %tmp45 = load i8, i8* %tmp44, align 1 809 %tmp46 = zext i8 %tmp45 to i32 810 %tmp47 = shl nuw i32 %tmp46, 24 811 %tmp48 = or i32 %tmp42, %tmp47 812 ret i32 %tmp48 813} 814 815; i8* p; // p is 2 byte aligned 816; (i32) p[0] | ((i32) p[1] << 8) 817define i32 @zext_load_i32_by_i8(i32* %arg) { 818; CHECK-LABEL: zext_load_i32_by_i8: 819; CHECK: @ %bb.0: 820; CHECK-NEXT: ldrb r1, [r0] 821; CHECK-NEXT: ldrb r0, [r0, #1] 822; CHECK-NEXT: orr r0, r1, r0, lsl #8 823; CHECK-NEXT: mov pc, lr 824; 825; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: 826; CHECK-ARMv6: @ %bb.0: 827; CHECK-ARMv6-NEXT: ldrh r0, [r0] 828; CHECK-ARMv6-NEXT: lsl r0, r0, #16 829; CHECK-ARMv6-NEXT: rev r0, r0 830; CHECK-ARMv6-NEXT: bx lr 831; 832; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8: 833; CHECK-THUMBv6: @ %bb.0: 834; CHECK-THUMBv6-NEXT: ldrh r0, [r0] 835; CHECK-THUMBv6-NEXT: lsls r0, r0, #16 836; CHECK-THUMBv6-NEXT: rev r0, r0 837; CHECK-THUMBv6-NEXT: bx lr 838; 839; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8: 840; CHECK-THUMBv7: @ %bb.0: 841; CHECK-THUMBv7-NEXT: ldrh r0, [r0] 842; CHECK-THUMBv7-NEXT: lsls r0, r0, #16 843; CHECK-THUMBv7-NEXT: rev r0, r0 844; CHECK-THUMBv7-NEXT: bx lr 845 846 %tmp = bitcast i32* %arg to i8* 847 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 848 %tmp2 = load i8, i8* %tmp1, align 2 849 %tmp3 = zext i8 %tmp2 to i32 850 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 851 %tmp5 = load i8, i8* %tmp4, align 1 852 %tmp6 = zext i8 %tmp5 to i32 853 %tmp7 = shl nuw nsw i32 %tmp6, 8 854 %tmp8 = or i32 %tmp7, %tmp3 855 ret i32 %tmp8 856} 857 858; i8* p; // p is 2 byte aligned 859; ((i32) p[0] << 8) | ((i32) p[1] << 16) 860define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { 861; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 862; CHECK: @ %bb.0: 863; CHECK-NEXT: ldrb r1, [r0] 864; CHECK-NEXT: ldrb r0, [r0, #1] 865; CHECK-NEXT: lsl r0, r0, #16 866; CHECK-NEXT: orr r0, r0, r1, lsl #8 867; CHECK-NEXT: mov pc, lr 868; 869; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8: 870; CHECK-ARMv6: @ %bb.0: 871; CHECK-ARMv6-NEXT: ldrb r1, [r0] 872; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] 873; CHECK-ARMv6-NEXT: lsl r0, r0, #16 874; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 875; CHECK-ARMv6-NEXT: bx lr 876; 877; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8: 878; CHECK-THUMBv6: @ %bb.0: 879; CHECK-THUMBv6-NEXT: ldrb r1, [r0] 880; CHECK-THUMBv6-NEXT: lsls r1, r1, #8 881; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1] 882; CHECK-THUMBv6-NEXT: lsls r0, r0, #16 883; CHECK-THUMBv6-NEXT: adds r0, r0, r1 884; CHECK-THUMBv6-NEXT: bx lr 885; 886; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8: 887; CHECK-THUMBv7: @ %bb.0: 888; CHECK-THUMBv7-NEXT: ldrb r1, [r0] 889; CHECK-THUMBv7-NEXT: lsls r1, r1, #8 890; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1] 891; CHECK-THUMBv7-NEXT: lsls r0, r0, #16 892; CHECK-THUMBv7-NEXT: adds r0, r0, r1 893; CHECK-THUMBv7-NEXT: bx lr 894 895 %tmp = bitcast i32* %arg to i8* 896 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 897 %tmp2 = load i8, i8* %tmp1, align 2 898 %tmp3 = zext i8 %tmp2 to i32 899 %tmp30 = shl nuw nsw i32 %tmp3, 8 900 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 901 %tmp5 = load i8, i8* %tmp4, align 1 902 %tmp6 = zext i8 %tmp5 to i32 903 %tmp7 = shl nuw nsw i32 %tmp6, 16 904 %tmp8 = or i32 %tmp7, %tmp30 905 ret i32 %tmp8 906} 907 908; i8* p; // p is 2 byte aligned 909; ((i32) p[0] << 16) | ((i32) p[1] << 24) 910define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { 911; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 912; CHECK: @ %bb.0: 913; CHECK-NEXT: ldrb r1, [r0] 914; CHECK-NEXT: ldrb r0, [r0, #1] 915; CHECK-NEXT: lsl r0, r0, #24 916; CHECK-NEXT: orr r0, r0, r1, lsl #16 917; CHECK-NEXT: mov pc, lr 918; 919; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16: 920; CHECK-ARMv6: @ %bb.0: 921; CHECK-ARMv6-NEXT: ldrb r1, [r0] 922; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] 923; CHECK-ARMv6-NEXT: lsl r0, r0, #24 924; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16 925; CHECK-ARMv6-NEXT: bx lr 926; 927; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16: 928; CHECK-THUMBv6: @ %bb.0: 929; CHECK-THUMBv6-NEXT: ldrb r1, [r0] 930; CHECK-THUMBv6-NEXT: lsls r1, r1, #16 931; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1] 932; CHECK-THUMBv6-NEXT: lsls r0, r0, #24 933; CHECK-THUMBv6-NEXT: adds r0, r0, r1 934; CHECK-THUMBv6-NEXT: bx lr 935; 936; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16: 937; CHECK-THUMBv7: @ %bb.0: 938; CHECK-THUMBv7-NEXT: ldrb r1, [r0] 939; CHECK-THUMBv7-NEXT: lsls r1, r1, #16 940; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1] 941; CHECK-THUMBv7-NEXT: lsls r0, r0, #24 942; CHECK-THUMBv7-NEXT: adds r0, r0, r1 943; CHECK-THUMBv7-NEXT: bx lr 944 945 %tmp = bitcast i32* %arg to i8* 946 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 947 %tmp2 = load i8, i8* %tmp1, align 2 948 %tmp3 = zext i8 %tmp2 to i32 949 %tmp30 = shl nuw nsw i32 %tmp3, 16 950 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 951 %tmp5 = load i8, i8* %tmp4, align 1 952 %tmp6 = zext i8 %tmp5 to i32 953 %tmp7 = shl nuw nsw i32 %tmp6, 24 954 %tmp8 = or i32 %tmp7, %tmp30 955 ret i32 %tmp8 956} 957 958; i8* p; // p is 2 byte aligned 959; (i32) p[1] | ((i32) p[0] << 8) 960define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { 961; CHECK-LABEL: zext_load_i32_by_i8_bswap: 962; CHECK: @ %bb.0: 963; CHECK-NEXT: ldrh r0, [r0] 964; CHECK-NEXT: mov pc, lr 965; 966; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: 967; CHECK-ARMv6: @ %bb.0: 968; CHECK-ARMv6-NEXT: ldrh r0, [r0] 969; CHECK-ARMv6-NEXT: bx lr 970; 971; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap: 972; CHECK-THUMBv6: @ %bb.0: 973; CHECK-THUMBv6-NEXT: ldrh r0, [r0] 974; CHECK-THUMBv6-NEXT: bx lr 975; 976; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap: 977; CHECK-THUMBv7: @ %bb.0: 978; CHECK-THUMBv7-NEXT: ldrh r0, [r0] 979; CHECK-THUMBv7-NEXT: bx lr 980 981 %tmp = bitcast i32* %arg to i8* 982 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 983 %tmp2 = load i8, i8* %tmp1, align 1 984 %tmp3 = zext i8 %tmp2 to i32 985 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 986 %tmp5 = load i8, i8* %tmp4, align 2 987 %tmp6 = zext i8 %tmp5 to i32 988 %tmp7 = shl nuw nsw i32 %tmp6, 8 989 %tmp8 = or i32 %tmp7, %tmp3 990 ret i32 %tmp8 991} 992 993; i8* p; // p is 2 byte aligned 994; ((i32) p[1] << 8) | ((i32) p[0] << 16) 995define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { 996; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 997; CHECK: @ %bb.0: 998; CHECK-NEXT: ldrb r1, [r0] 999; CHECK-NEXT: ldrb r0, [r0, #1] 1000; CHECK-NEXT: lsl r1, r1, #16 1001; CHECK-NEXT: orr r0, r1, r0, lsl #8 1002; CHECK-NEXT: mov pc, lr 1003; 1004; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1005; CHECK-ARMv6: @ %bb.0: 1006; CHECK-ARMv6-NEXT: ldrb r1, [r0] 1007; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] 1008; CHECK-ARMv6-NEXT: lsl r1, r1, #16 1009; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 1010; CHECK-ARMv6-NEXT: bx lr 1011; 1012; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1013; CHECK-THUMBv6: @ %bb.0: 1014; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1] 1015; CHECK-THUMBv6-NEXT: lsls r1, r1, #8 1016; CHECK-THUMBv6-NEXT: ldrb r0, [r0] 1017; CHECK-THUMBv6-NEXT: lsls r0, r0, #16 1018; CHECK-THUMBv6-NEXT: adds r0, r0, r1 1019; CHECK-THUMBv6-NEXT: bx lr 1020; 1021; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1022; CHECK-THUMBv7: @ %bb.0: 1023; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1] 1024; CHECK-THUMBv7-NEXT: lsls r1, r1, #8 1025; CHECK-THUMBv7-NEXT: ldrb r0, [r0] 1026; CHECK-THUMBv7-NEXT: lsls r0, r0, #16 1027; CHECK-THUMBv7-NEXT: adds r0, r0, r1 1028; CHECK-THUMBv7-NEXT: bx lr 1029 1030 %tmp = bitcast i32* %arg to i8* 1031 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1032 %tmp2 = load i8, i8* %tmp1, align 1 1033 %tmp3 = zext i8 %tmp2 to i32 1034 %tmp30 = shl nuw nsw i32 %tmp3, 8 1035 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1036 %tmp5 = load i8, i8* %tmp4, align 2 1037 %tmp6 = zext i8 %tmp5 to i32 1038 %tmp7 = shl nuw nsw i32 %tmp6, 16 1039 %tmp8 = or i32 %tmp7, %tmp30 1040 ret i32 %tmp8 1041} 1042 1043; i8* p; // p is 2 byte aligned 1044; ((i32) p[1] << 16) | ((i32) p[0] << 24) 1045define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { 1046; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1047; CHECK: @ %bb.0: 1048; CHECK-NEXT: ldrb r1, [r0] 1049; CHECK-NEXT: ldrb r0, [r0, #1] 1050; CHECK-NEXT: lsl r1, r1, #24 1051; CHECK-NEXT: orr r0, r1, r0, lsl #16 1052; CHECK-NEXT: mov pc, lr 1053; 1054; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1055; CHECK-ARMv6: @ %bb.0: 1056; CHECK-ARMv6-NEXT: ldrb r1, [r0] 1057; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] 1058; CHECK-ARMv6-NEXT: lsl r1, r1, #24 1059; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16 1060; CHECK-ARMv6-NEXT: bx lr 1061; 1062; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1063; CHECK-THUMBv6: @ %bb.0: 1064; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1] 1065; CHECK-THUMBv6-NEXT: lsls r1, r1, #16 1066; CHECK-THUMBv6-NEXT: ldrb r0, [r0] 1067; CHECK-THUMBv6-NEXT: lsls r0, r0, #24 1068; CHECK-THUMBv6-NEXT: adds r0, r0, r1 1069; CHECK-THUMBv6-NEXT: bx lr 1070; 1071; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1072; CHECK-THUMBv7: @ %bb.0: 1073; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1] 1074; CHECK-THUMBv7-NEXT: lsls r1, r1, #16 1075; CHECK-THUMBv7-NEXT: ldrb r0, [r0] 1076; CHECK-THUMBv7-NEXT: lsls r0, r0, #24 1077; CHECK-THUMBv7-NEXT: adds r0, r0, r1 1078; CHECK-THUMBv7-NEXT: bx lr 1079 1080 %tmp = bitcast i32* %arg to i8* 1081 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1082 %tmp2 = load i8, i8* %tmp1, align 1 1083 %tmp3 = zext i8 %tmp2 to i32 1084 %tmp30 = shl nuw nsw i32 %tmp3, 16 1085 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1086 %tmp5 = load i8, i8* %tmp4, align 2 1087 %tmp6 = zext i8 %tmp5 to i32 1088 %tmp7 = shl nuw nsw i32 %tmp6, 24 1089 %tmp8 = or i32 %tmp7, %tmp30 1090 ret i32 %tmp8 1091} 1092 1093; i8* p; 1094; i16* p1.i16 = (i16*) p; 1095; (p1.i16[0] << 8) | ((i16) p[2]) 1096; 1097; This is essentialy a i16 load from p[1], but we don't fold the pattern now 1098; because in the original DAG we don't have p[1] address available 1099define i16 @load_i16_from_nonzero_offset(i8* %p) { 1100; CHECK-LABEL: load_i16_from_nonzero_offset: 1101; CHECK: @ %bb.0: 1102; CHECK-NEXT: ldrh r1, [r0] 1103; CHECK-NEXT: ldrb r0, [r0, #2] 1104; CHECK-NEXT: orr r0, r0, r1, lsl #8 1105; CHECK-NEXT: mov pc, lr 1106; 1107; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset: 1108; CHECK-ARMv6: @ %bb.0: 1109; CHECK-ARMv6-NEXT: ldrh r1, [r0] 1110; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2] 1111; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 1112; CHECK-ARMv6-NEXT: bx lr 1113; 1114; CHECK-THUMBv6-LABEL: load_i16_from_nonzero_offset: 1115; CHECK-THUMBv6: @ %bb.0: 1116; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #2] 1117; CHECK-THUMBv6-NEXT: ldrh r0, [r0] 1118; CHECK-THUMBv6-NEXT: lsls r0, r0, #8 1119; CHECK-THUMBv6-NEXT: adds r0, r0, r1 1120; CHECK-THUMBv6-NEXT: bx lr 1121; 1122; CHECK-THUMBv7-LABEL: load_i16_from_nonzero_offset: 1123; CHECK-THUMBv7: @ %bb.0: 1124; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #2] 1125; CHECK-THUMBv7-NEXT: ldrh r0, [r0] 1126; CHECK-THUMBv7-NEXT: lsls r0, r0, #8 1127; CHECK-THUMBv7-NEXT: adds r0, r0, r1 1128; CHECK-THUMBv7-NEXT: bx lr 1129 1130 %p1.i16 = bitcast i8* %p to i16* 1131 %p2.i8 = getelementptr i8, i8* %p, i64 2 1132 %v1 = load i16, i16* %p1.i16 1133 %v2.i8 = load i8, i8* %p2.i8 1134 %v2 = zext i8 %v2.i8 to i16 1135 %v1.shl = shl i16 %v1, 8 1136 %res = or i16 %v1.shl, %v2 1137 ret i16 %res 1138} 1139