1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE 4; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64 6 7; i8* p; 8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 9define i32 @load_i32_by_i8(i32* %arg) { 10; CHECK-LABEL: load_i32_by_i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 13; CHECK-NEXT: movl (%eax), %eax 14; CHECK-NEXT: retl 15; 16; CHECK64-LABEL: load_i32_by_i8: 17; CHECK64: # %bb.0: 18; CHECK64-NEXT: movl (%rdi), %eax 19; CHECK64-NEXT: retq 20 %tmp = bitcast i32* %arg to i8* 21 %tmp1 = load i8, i8* %tmp, align 1 22 %tmp2 = zext i8 %tmp1 to i32 23 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 24 %tmp4 = load i8, i8* %tmp3, align 1 25 %tmp5 = zext i8 %tmp4 to i32 26 %tmp6 = shl nuw nsw i32 %tmp5, 8 27 %tmp7 = or i32 %tmp6, %tmp2 28 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 29 %tmp9 = load i8, i8* %tmp8, align 1 30 %tmp10 = zext i8 %tmp9 to i32 31 %tmp11 = shl nuw nsw i32 %tmp10, 16 32 %tmp12 = or i32 %tmp7, %tmp11 33 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3 34 %tmp14 = load i8, i8* %tmp13, align 1 35 %tmp15 = zext i8 %tmp14 to i32 36 %tmp16 = shl nuw nsw i32 %tmp15, 24 37 %tmp17 = or i32 %tmp12, %tmp16 38 ret i32 %tmp17 39} 40 41; i8* p; 42; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 43define i32 @load_i32_by_i8_bswap(i32* %arg) { 44; BSWAP-LABEL: load_i32_by_i8_bswap: 45; BSWAP: # %bb.0: 46; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 47; BSWAP-NEXT: movl (%eax), %eax 48; BSWAP-NEXT: bswapl %eax 49; BSWAP-NEXT: retl 50; 51; MOVBE-LABEL: load_i32_by_i8_bswap: 52; MOVBE: # %bb.0: 53; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 54; MOVBE-NEXT: movbel (%eax), %eax 55; MOVBE-NEXT: retl 56; 57; BSWAP64-LABEL: load_i32_by_i8_bswap: 58; BSWAP64: # %bb.0: 59; BSWAP64-NEXT: movl (%rdi), %eax 60; BSWAP64-NEXT: bswapl %eax 61; BSWAP64-NEXT: retq 62; 63; MOVBE64-LABEL: load_i32_by_i8_bswap: 64; MOVBE64: # %bb.0: 65; MOVBE64-NEXT: movbel (%rdi), %eax 66; MOVBE64-NEXT: retq 67 %tmp = bitcast i32* %arg to i8* 68 %tmp1 = load i8, i8* %tmp, align 1 69 %tmp2 = zext i8 %tmp1 to i32 70 %tmp3 = shl nuw nsw i32 %tmp2, 24 71 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 72 %tmp5 = load i8, i8* %tmp4, align 1 73 %tmp6 = zext i8 %tmp5 to i32 74 %tmp7 = shl nuw nsw i32 %tmp6, 16 75 %tmp8 = or i32 %tmp7, %tmp3 76 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 77 %tmp10 = load i8, i8* %tmp9, align 1 78 %tmp11 = zext i8 %tmp10 to i32 79 %tmp12 = shl nuw nsw i32 %tmp11, 8 80 %tmp13 = or i32 %tmp8, %tmp12 81 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 82 %tmp15 = load i8, i8* %tmp14, align 1 83 %tmp16 = zext i8 %tmp15 to i32 84 %tmp17 = or i32 %tmp13, %tmp16 85 ret i32 %tmp17 86} 87 88; i16* p; 89; (i32) p[0] | ((i32) p[1] << 16) 90define i32 @load_i32_by_i16(i32* %arg) { 91; CHECK-LABEL: load_i32_by_i16: 92; CHECK: # %bb.0: 93; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 94; CHECK-NEXT: movl (%eax), %eax 95; CHECK-NEXT: retl 96; 97; CHECK64-LABEL: load_i32_by_i16: 98; CHECK64: # %bb.0: 99; CHECK64-NEXT: movl (%rdi), %eax 100; CHECK64-NEXT: retq 101 %tmp = bitcast i32* %arg to i16* 102 %tmp1 = load i16, i16* %tmp, align 1 103 %tmp2 = zext i16 %tmp1 to i32 104 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 105 %tmp4 = load i16, i16* %tmp3, align 1 106 %tmp5 = zext i16 %tmp4 to i32 107 %tmp6 = shl nuw nsw i32 %tmp5, 16 108 %tmp7 = or i32 %tmp6, %tmp2 109 ret i32 %tmp7 110} 111 112; i16* p_16; 113; i8* p_8 = (i8*) p_16; 114; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24) 115define i32 @load_i32_by_i16_i8(i32* %arg) { 116; CHECK-LABEL: load_i32_by_i16_i8: 117; CHECK: # %bb.0: 118; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 119; CHECK-NEXT: movl (%eax), %eax 120; CHECK-NEXT: retl 121; 122; CHECK64-LABEL: load_i32_by_i16_i8: 123; CHECK64: # %bb.0: 124; CHECK64-NEXT: movl (%rdi), %eax 125; CHECK64-NEXT: retq 126 %tmp = bitcast i32* %arg to i16* 127 %tmp1 = bitcast i32* %arg to i8* 128 %tmp2 = load i16, i16* %tmp, align 1 129 %tmp3 = zext i16 %tmp2 to i32 130 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2 131 %tmp5 = load i8, i8* %tmp4, align 1 132 %tmp6 = zext i8 %tmp5 to i32 133 %tmp7 = shl nuw nsw i32 %tmp6, 16 134 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3 135 %tmp9 = load i8, i8* %tmp8, align 1 136 %tmp10 = zext i8 %tmp9 to i32 137 %tmp11 = shl nuw nsw i32 %tmp10, 24 138 %tmp12 = or i32 %tmp7, %tmp11 139 %tmp13 = or i32 %tmp12, %tmp3 140 ret i32 %tmp13 141} 142 143 144; i8* p; 145; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16) 146define i32 @load_i32_by_i16_by_i8(i32* %arg) { 147; CHECK-LABEL: load_i32_by_i16_by_i8: 148; CHECK: # %bb.0: 149; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 150; CHECK-NEXT: movl (%eax), %eax 151; CHECK-NEXT: retl 152; 153; CHECK64-LABEL: load_i32_by_i16_by_i8: 154; CHECK64: # %bb.0: 155; CHECK64-NEXT: movl (%rdi), %eax 156; CHECK64-NEXT: retq 157 %tmp = bitcast i32* %arg to i8* 158 %tmp1 = load i8, i8* %tmp, align 1 159 %tmp2 = zext i8 %tmp1 to i16 160 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 161 %tmp4 = load i8, i8* %tmp3, align 1 162 %tmp5 = zext i8 %tmp4 to i16 163 %tmp6 = shl nuw nsw i16 %tmp5, 8 164 %tmp7 = or i16 %tmp6, %tmp2 165 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 166 %tmp9 = load i8, i8* %tmp8, align 1 167 %tmp10 = zext i8 %tmp9 to i16 168 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 169 %tmp12 = load i8, i8* %tmp11, align 1 170 %tmp13 = zext i8 %tmp12 to i16 171 %tmp14 = shl nuw nsw i16 %tmp13, 8 172 %tmp15 = or i16 %tmp14, %tmp10 173 %tmp16 = zext i16 %tmp7 to i32 174 %tmp17 = zext i16 %tmp15 to i32 175 %tmp18 = shl nuw nsw i32 %tmp17, 16 176 %tmp19 = or i32 %tmp18, %tmp16 177 ret i32 %tmp19 178} 179 180; i8* p; 181; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) 182define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) { 183; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap: 184; BSWAP: # %bb.0: 185; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 186; BSWAP-NEXT: movl (%eax), %eax 187; BSWAP-NEXT: bswapl %eax 188; BSWAP-NEXT: retl 189; 190; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap: 191; MOVBE: # %bb.0: 192; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 193; MOVBE-NEXT: movbel (%eax), %eax 194; MOVBE-NEXT: retl 195; 196; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap: 197; BSWAP64: # %bb.0: 198; BSWAP64-NEXT: movl (%rdi), %eax 199; BSWAP64-NEXT: bswapl %eax 200; BSWAP64-NEXT: retq 201; 202; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap: 203; MOVBE64: # %bb.0: 204; MOVBE64-NEXT: movbel (%rdi), %eax 205; MOVBE64-NEXT: retq 206 %tmp = bitcast i32* %arg to i8* 207 %tmp1 = load i8, i8* %tmp, align 1 208 %tmp2 = zext i8 %tmp1 to i16 209 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 210 %tmp4 = load i8, i8* %tmp3, align 1 211 %tmp5 = zext i8 %tmp4 to i16 212 %tmp6 = shl nuw nsw i16 %tmp2, 8 213 %tmp7 = or i16 %tmp6, %tmp5 214 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 215 %tmp9 = load i8, i8* %tmp8, align 1 216 %tmp10 = zext i8 %tmp9 to i16 217 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 218 %tmp12 = load i8, i8* %tmp11, align 1 219 %tmp13 = zext i8 %tmp12 to i16 220 %tmp14 = shl nuw nsw i16 %tmp10, 8 221 %tmp15 = or i16 %tmp14, %tmp13 222 %tmp16 = zext i16 %tmp7 to i32 223 %tmp17 = zext i16 %tmp15 to i32 224 %tmp18 = shl nuw nsw i32 %tmp16, 16 225 %tmp19 = or i32 %tmp18, %tmp17 226 ret i32 %tmp19 227} 228 229; i8* p; 230; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 231define i64 @load_i64_by_i8(i64* %arg) { 232; CHECK-LABEL: load_i64_by_i8: 233; CHECK: # %bb.0: 234; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 235; CHECK-NEXT: movl (%ecx), %eax 236; CHECK-NEXT: movl 4(%ecx), %edx 237; CHECK-NEXT: retl 238; 239; CHECK64-LABEL: load_i64_by_i8: 240; CHECK64: # %bb.0: 241; CHECK64-NEXT: movq (%rdi), %rax 242; CHECK64-NEXT: retq 243 %tmp = bitcast i64* %arg to i8* 244 %tmp1 = load i8, i8* %tmp, align 1 245 %tmp2 = zext i8 %tmp1 to i64 246 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 247 %tmp4 = load i8, i8* %tmp3, align 1 248 %tmp5 = zext i8 %tmp4 to i64 249 %tmp6 = shl nuw nsw i64 %tmp5, 8 250 %tmp7 = or i64 %tmp6, %tmp2 251 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 252 %tmp9 = load i8, i8* %tmp8, align 1 253 %tmp10 = zext i8 %tmp9 to i64 254 %tmp11 = shl nuw nsw i64 %tmp10, 16 255 %tmp12 = or i64 %tmp7, %tmp11 256 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 257 %tmp14 = load i8, i8* %tmp13, align 1 258 %tmp15 = zext i8 %tmp14 to i64 259 %tmp16 = shl nuw nsw i64 %tmp15, 24 260 %tmp17 = or i64 %tmp12, %tmp16 261 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 262 %tmp19 = load i8, i8* %tmp18, align 1 263 %tmp20 = zext i8 %tmp19 to i64 264 %tmp21 = shl nuw nsw i64 %tmp20, 32 265 %tmp22 = or i64 %tmp17, %tmp21 266 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 267 %tmp24 = load i8, i8* %tmp23, align 1 268 %tmp25 = zext i8 %tmp24 to i64 269 %tmp26 = shl nuw nsw i64 %tmp25, 40 270 %tmp27 = or i64 %tmp22, %tmp26 271 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 272 %tmp29 = load i8, i8* %tmp28, align 1 273 %tmp30 = zext i8 %tmp29 to i64 274 %tmp31 = shl nuw nsw i64 %tmp30, 48 275 %tmp32 = or i64 %tmp27, %tmp31 276 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 277 %tmp34 = load i8, i8* %tmp33, align 1 278 %tmp35 = zext i8 %tmp34 to i64 279 %tmp36 = shl nuw i64 %tmp35, 56 280 %tmp37 = or i64 %tmp32, %tmp36 281 ret i64 %tmp37 282} 283 284; i8* p; 285; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 286define i64 @load_i64_by_i8_bswap(i64* %arg) { 287; BSWAP-LABEL: load_i64_by_i8_bswap: 288; BSWAP: # %bb.0: 289; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 290; BSWAP-NEXT: movl (%eax), %edx 291; BSWAP-NEXT: movl 4(%eax), %eax 292; BSWAP-NEXT: bswapl %eax 293; BSWAP-NEXT: bswapl %edx 294; BSWAP-NEXT: retl 295; 296; MOVBE-LABEL: load_i64_by_i8_bswap: 297; MOVBE: # %bb.0: 298; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 299; MOVBE-NEXT: movbel 4(%ecx), %eax 300; MOVBE-NEXT: movbel (%ecx), %edx 301; MOVBE-NEXT: retl 302; 303; BSWAP64-LABEL: load_i64_by_i8_bswap: 304; BSWAP64: # %bb.0: 305; BSWAP64-NEXT: movq (%rdi), %rax 306; BSWAP64-NEXT: bswapq %rax 307; BSWAP64-NEXT: retq 308; 309; MOVBE64-LABEL: load_i64_by_i8_bswap: 310; MOVBE64: # %bb.0: 311; MOVBE64-NEXT: movbeq (%rdi), %rax 312; MOVBE64-NEXT: retq 313 %tmp = bitcast i64* %arg to i8* 314 %tmp1 = load i8, i8* %tmp, align 1 315 %tmp2 = zext i8 %tmp1 to i64 316 %tmp3 = shl nuw i64 %tmp2, 56 317 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 318 %tmp5 = load i8, i8* %tmp4, align 1 319 %tmp6 = zext i8 %tmp5 to i64 320 %tmp7 = shl nuw nsw i64 %tmp6, 48 321 %tmp8 = or i64 %tmp7, %tmp3 322 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 323 %tmp10 = load i8, i8* %tmp9, align 1 324 %tmp11 = zext i8 %tmp10 to i64 325 %tmp12 = shl nuw nsw i64 %tmp11, 40 326 %tmp13 = or i64 %tmp8, %tmp12 327 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 328 %tmp15 = load i8, i8* %tmp14, align 1 329 %tmp16 = zext i8 %tmp15 to i64 330 %tmp17 = shl nuw nsw i64 %tmp16, 32 331 %tmp18 = or i64 %tmp13, %tmp17 332 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 333 %tmp20 = load i8, i8* %tmp19, align 1 334 %tmp21 = zext i8 %tmp20 to i64 335 %tmp22 = shl nuw nsw i64 %tmp21, 24 336 %tmp23 = or i64 %tmp18, %tmp22 337 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 338 %tmp25 = load i8, i8* %tmp24, align 1 339 %tmp26 = zext i8 %tmp25 to i64 340 %tmp27 = shl nuw nsw i64 %tmp26, 16 341 %tmp28 = or i64 %tmp23, %tmp27 342 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 343 %tmp30 = load i8, i8* %tmp29, align 1 344 %tmp31 = zext i8 %tmp30 to i64 345 %tmp32 = shl nuw nsw i64 %tmp31, 8 346 %tmp33 = or i64 %tmp28, %tmp32 347 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 348 %tmp35 = load i8, i8* %tmp34, align 1 349 %tmp36 = zext i8 %tmp35 to i64 350 %tmp37 = or i64 %tmp33, %tmp36 351 ret i64 %tmp37 352} 353 354; Part of the load by bytes pattern is used outside of the pattern 355; i8* p; 356; i32 x = (i32) p[1] 357; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3] 358; x | res 359define i32 @load_i32_by_i8_bswap_uses(i32* %arg) { 360; CHECK-LABEL: load_i32_by_i8_bswap_uses: 361; CHECK: # %bb.0: 362; CHECK-NEXT: pushl %esi 363; CHECK-NEXT: .cfi_def_cfa_offset 8 364; CHECK-NEXT: .cfi_offset %esi, -8 365; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 366; CHECK-NEXT: movzbl (%eax), %ecx 367; CHECK-NEXT: shll $24, %ecx 368; CHECK-NEXT: movzbl 1(%eax), %edx 369; CHECK-NEXT: movl %edx, %esi 370; CHECK-NEXT: shll $16, %esi 371; CHECK-NEXT: orl %ecx, %esi 372; CHECK-NEXT: movzbl 2(%eax), %ecx 373; CHECK-NEXT: shll $8, %ecx 374; CHECK-NEXT: orl %esi, %ecx 375; CHECK-NEXT: movzbl 3(%eax), %eax 376; CHECK-NEXT: orl %ecx, %eax 377; CHECK-NEXT: orl %edx, %eax 378; CHECK-NEXT: popl %esi 379; CHECK-NEXT: .cfi_def_cfa_offset 4 380; CHECK-NEXT: retl 381; 382; CHECK64-LABEL: load_i32_by_i8_bswap_uses: 383; CHECK64: # %bb.0: 384; CHECK64-NEXT: movzbl (%rdi), %eax 385; CHECK64-NEXT: shll $24, %eax 386; CHECK64-NEXT: movzbl 1(%rdi), %ecx 387; CHECK64-NEXT: movl %ecx, %edx 388; CHECK64-NEXT: shll $16, %edx 389; CHECK64-NEXT: orl %eax, %edx 390; CHECK64-NEXT: movzbl 2(%rdi), %esi 391; CHECK64-NEXT: shll $8, %esi 392; CHECK64-NEXT: orl %edx, %esi 393; CHECK64-NEXT: movzbl 3(%rdi), %eax 394; CHECK64-NEXT: orl %esi, %eax 395; CHECK64-NEXT: orl %ecx, %eax 396; CHECK64-NEXT: retq 397 %tmp = bitcast i32* %arg to i8* 398 %tmp1 = load i8, i8* %tmp, align 1 399 %tmp2 = zext i8 %tmp1 to i32 400 %tmp3 = shl nuw nsw i32 %tmp2, 24 401 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 402 %tmp5 = load i8, i8* %tmp4, align 1 403 %tmp6 = zext i8 %tmp5 to i32 404 %tmp7 = shl nuw nsw i32 %tmp6, 16 405 %tmp8 = or i32 %tmp7, %tmp3 406 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 407 %tmp10 = load i8, i8* %tmp9, align 1 408 %tmp11 = zext i8 %tmp10 to i32 409 %tmp12 = shl nuw nsw i32 %tmp11, 8 410 %tmp13 = or i32 %tmp8, %tmp12 411 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 412 %tmp15 = load i8, i8* %tmp14, align 1 413 %tmp16 = zext i8 %tmp15 to i32 414 %tmp17 = or i32 %tmp13, %tmp16 415 ; Use individual part of the pattern outside of the pattern 416 %tmp18 = or i32 %tmp6, %tmp17 417 ret i32 %tmp18 418} 419 420; One of the loads is volatile 421; i8* p; 422; p0 = volatile *p; 423; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 424define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) { 425; CHECK-LABEL: load_i32_by_i8_bswap_volatile: 426; CHECK: # %bb.0: 427; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 428; CHECK-NEXT: movzbl (%eax), %ecx 429; CHECK-NEXT: shll $24, %ecx 430; CHECK-NEXT: movzbl 1(%eax), %edx 431; CHECK-NEXT: shll $16, %edx 432; CHECK-NEXT: orl %ecx, %edx 433; CHECK-NEXT: movzbl 2(%eax), %ecx 434; CHECK-NEXT: shll $8, %ecx 435; CHECK-NEXT: orl %edx, %ecx 436; CHECK-NEXT: movzbl 3(%eax), %eax 437; CHECK-NEXT: orl %ecx, %eax 438; CHECK-NEXT: retl 439; 440; CHECK64-LABEL: load_i32_by_i8_bswap_volatile: 441; CHECK64: # %bb.0: 442; CHECK64-NEXT: movzbl (%rdi), %eax 443; CHECK64-NEXT: shll $24, %eax 444; CHECK64-NEXT: movzbl 1(%rdi), %ecx 445; CHECK64-NEXT: shll $16, %ecx 446; CHECK64-NEXT: orl %eax, %ecx 447; CHECK64-NEXT: movzbl 2(%rdi), %edx 448; CHECK64-NEXT: shll $8, %edx 449; CHECK64-NEXT: orl %ecx, %edx 450; CHECK64-NEXT: movzbl 3(%rdi), %eax 451; CHECK64-NEXT: orl %edx, %eax 452; CHECK64-NEXT: retq 453 %tmp = bitcast i32* %arg to i8* 454 %tmp1 = load volatile i8, i8* %tmp, align 1 455 %tmp2 = zext i8 %tmp1 to i32 456 %tmp3 = shl nuw nsw i32 %tmp2, 24 457 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 458 %tmp5 = load i8, i8* %tmp4, align 1 459 %tmp6 = zext i8 %tmp5 to i32 460 %tmp7 = shl nuw nsw i32 %tmp6, 16 461 %tmp8 = or i32 %tmp7, %tmp3 462 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 463 %tmp10 = load i8, i8* %tmp9, align 1 464 %tmp11 = zext i8 %tmp10 to i32 465 %tmp12 = shl nuw nsw i32 %tmp11, 8 466 %tmp13 = or i32 %tmp8, %tmp12 467 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 468 %tmp15 = load i8, i8* %tmp14, align 1 469 %tmp16 = zext i8 %tmp15 to i32 470 %tmp17 = or i32 %tmp13, %tmp16 471 ret i32 %tmp17 472} 473 474; There is a store in between individual loads 475; i8* p, q; 476; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16) 477; *q = 0; 478; res2 = ((i32) p[2] << 8) | (i32) p[3] 479; res1 | res2 480define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) { 481; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between: 482; CHECK: # %bb.0: 483; CHECK-NEXT: pushl %esi 484; CHECK-NEXT: .cfi_def_cfa_offset 8 485; CHECK-NEXT: .cfi_offset %esi, -8 486; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 487; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 488; CHECK-NEXT: movzbl (%ecx), %edx 489; CHECK-NEXT: shll $24, %edx 490; CHECK-NEXT: movzbl 1(%ecx), %esi 491; CHECK-NEXT: movl $0, (%eax) 492; CHECK-NEXT: shll $16, %esi 493; CHECK-NEXT: orl %edx, %esi 494; CHECK-NEXT: movzbl 2(%ecx), %edx 495; CHECK-NEXT: shll $8, %edx 496; CHECK-NEXT: orl %esi, %edx 497; CHECK-NEXT: movzbl 3(%ecx), %eax 498; CHECK-NEXT: orl %edx, %eax 499; CHECK-NEXT: popl %esi 500; CHECK-NEXT: .cfi_def_cfa_offset 4 501; CHECK-NEXT: retl 502; 503; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between: 504; CHECK64: # %bb.0: 505; CHECK64-NEXT: movzbl (%rdi), %eax 506; CHECK64-NEXT: shll $24, %eax 507; CHECK64-NEXT: movzbl 1(%rdi), %ecx 508; CHECK64-NEXT: movl $0, (%rsi) 509; CHECK64-NEXT: shll $16, %ecx 510; CHECK64-NEXT: orl %eax, %ecx 511; CHECK64-NEXT: movzbl 2(%rdi), %edx 512; CHECK64-NEXT: shll $8, %edx 513; CHECK64-NEXT: orl %ecx, %edx 514; CHECK64-NEXT: movzbl 3(%rdi), %eax 515; CHECK64-NEXT: orl %edx, %eax 516; CHECK64-NEXT: retq 517 %tmp = bitcast i32* %arg to i8* 518 %tmp2 = load i8, i8* %tmp, align 1 519 %tmp3 = zext i8 %tmp2 to i32 520 %tmp4 = shl nuw nsw i32 %tmp3, 24 521 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1 522 %tmp6 = load i8, i8* %tmp5, align 1 523 ; This store will prevent folding of the pattern 524 store i32 0, i32* %arg1 525 %tmp7 = zext i8 %tmp6 to i32 526 %tmp8 = shl nuw nsw i32 %tmp7, 16 527 %tmp9 = or i32 %tmp8, %tmp4 528 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2 529 %tmp11 = load i8, i8* %tmp10, align 1 530 %tmp12 = zext i8 %tmp11 to i32 531 %tmp13 = shl nuw nsw i32 %tmp12, 8 532 %tmp14 = or i32 %tmp9, %tmp13 533 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3 534 %tmp16 = load i8, i8* %tmp15, align 1 535 %tmp17 = zext i8 %tmp16 to i32 536 %tmp18 = or i32 %tmp14, %tmp17 537 ret i32 %tmp18 538} 539 540; One of the loads is from an unrelated location 541; i8* p, q; 542; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 543define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) { 544; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load: 545; CHECK: # %bb.0: 546; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 547; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 548; CHECK-NEXT: movzbl (%ecx), %edx 549; CHECK-NEXT: shll $24, %edx 550; CHECK-NEXT: movzbl 1(%eax), %eax 551; CHECK-NEXT: shll $16, %eax 552; CHECK-NEXT: orl %edx, %eax 553; CHECK-NEXT: movzbl 2(%ecx), %edx 554; CHECK-NEXT: shll $8, %edx 555; CHECK-NEXT: orl %eax, %edx 556; CHECK-NEXT: movzbl 3(%ecx), %eax 557; CHECK-NEXT: orl %edx, %eax 558; CHECK-NEXT: retl 559; 560; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load: 561; CHECK64: # %bb.0: 562; CHECK64-NEXT: movzbl (%rdi), %eax 563; CHECK64-NEXT: shll $24, %eax 564; CHECK64-NEXT: movzbl 1(%rsi), %ecx 565; CHECK64-NEXT: shll $16, %ecx 566; CHECK64-NEXT: orl %eax, %ecx 567; CHECK64-NEXT: movzbl 2(%rdi), %edx 568; CHECK64-NEXT: shll $8, %edx 569; CHECK64-NEXT: orl %ecx, %edx 570; CHECK64-NEXT: movzbl 3(%rdi), %eax 571; CHECK64-NEXT: orl %edx, %eax 572; CHECK64-NEXT: retq 573 %tmp = bitcast i32* %arg to i8* 574 %tmp2 = bitcast i32* %arg1 to i8* 575 %tmp3 = load i8, i8* %tmp, align 1 576 %tmp4 = zext i8 %tmp3 to i32 577 %tmp5 = shl nuw nsw i32 %tmp4, 24 578 ; Load from an unrelated address 579 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1 580 %tmp7 = load i8, i8* %tmp6, align 1 581 %tmp8 = zext i8 %tmp7 to i32 582 %tmp9 = shl nuw nsw i32 %tmp8, 16 583 %tmp10 = or i32 %tmp9, %tmp5 584 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2 585 %tmp12 = load i8, i8* %tmp11, align 1 586 %tmp13 = zext i8 %tmp12 to i32 587 %tmp14 = shl nuw nsw i32 %tmp13, 8 588 %tmp15 = or i32 %tmp10, %tmp14 589 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3 590 %tmp17 = load i8, i8* %tmp16, align 1 591 %tmp18 = zext i8 %tmp17 to i32 592 %tmp19 = or i32 %tmp15, %tmp18 593 ret i32 %tmp19 594} 595 596; i8* p; 597; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 598define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { 599; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 600; CHECK: # %bb.0: 601; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 602; CHECK-NEXT: movl 1(%eax), %eax 603; CHECK-NEXT: retl 604; 605; CHECK64-LABEL: load_i32_by_i8_nonzero_offset: 606; CHECK64: # %bb.0: 607; CHECK64-NEXT: movl 1(%rdi), %eax 608; CHECK64-NEXT: retq 609 %tmp = bitcast i32* %arg to i8* 610 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 611 %tmp2 = load i8, i8* %tmp1, align 1 612 %tmp3 = zext i8 %tmp2 to i32 613 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 614 %tmp5 = load i8, i8* %tmp4, align 1 615 %tmp6 = zext i8 %tmp5 to i32 616 %tmp7 = shl nuw nsw i32 %tmp6, 8 617 %tmp8 = or i32 %tmp7, %tmp3 618 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 619 %tmp10 = load i8, i8* %tmp9, align 1 620 %tmp11 = zext i8 %tmp10 to i32 621 %tmp12 = shl nuw nsw i32 %tmp11, 16 622 %tmp13 = or i32 %tmp8, %tmp12 623 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 624 %tmp15 = load i8, i8* %tmp14, align 1 625 %tmp16 = zext i8 %tmp15 to i32 626 %tmp17 = shl nuw nsw i32 %tmp16, 24 627 %tmp18 = or i32 %tmp13, %tmp17 628 ret i32 %tmp18 629} 630 631; i8* p; 632; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 633define i32 @load_i32_by_i8_neg_offset(i32* %arg) { 634; CHECK-LABEL: load_i32_by_i8_neg_offset: 635; CHECK: # %bb.0: 636; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 637; CHECK-NEXT: movl -4(%eax), %eax 638; CHECK-NEXT: retl 639; 640; CHECK64-LABEL: load_i32_by_i8_neg_offset: 641; CHECK64: # %bb.0: 642; CHECK64-NEXT: movl -4(%rdi), %eax 643; CHECK64-NEXT: retq 644 %tmp = bitcast i32* %arg to i8* 645 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 646 %tmp2 = load i8, i8* %tmp1, align 1 647 %tmp3 = zext i8 %tmp2 to i32 648 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 649 %tmp5 = load i8, i8* %tmp4, align 1 650 %tmp6 = zext i8 %tmp5 to i32 651 %tmp7 = shl nuw nsw i32 %tmp6, 8 652 %tmp8 = or i32 %tmp7, %tmp3 653 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 654 %tmp10 = load i8, i8* %tmp9, align 1 655 %tmp11 = zext i8 %tmp10 to i32 656 %tmp12 = shl nuw nsw i32 %tmp11, 16 657 %tmp13 = or i32 %tmp8, %tmp12 658 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 659 %tmp15 = load i8, i8* %tmp14, align 1 660 %tmp16 = zext i8 %tmp15 to i32 661 %tmp17 = shl nuw nsw i32 %tmp16, 24 662 %tmp18 = or i32 %tmp13, %tmp17 663 ret i32 %tmp18 664} 665 666; i8* p; 667; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 668define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { 669; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap: 670; BSWAP: # %bb.0: 671; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 672; BSWAP-NEXT: movl 1(%eax), %eax 673; BSWAP-NEXT: bswapl %eax 674; BSWAP-NEXT: retl 675; 676; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap: 677; MOVBE: # %bb.0: 678; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 679; MOVBE-NEXT: movbel 1(%eax), %eax 680; MOVBE-NEXT: retl 681; 682; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 683; BSWAP64: # %bb.0: 684; BSWAP64-NEXT: movl 1(%rdi), %eax 685; BSWAP64-NEXT: bswapl %eax 686; BSWAP64-NEXT: retq 687; 688; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 689; MOVBE64: # %bb.0: 690; MOVBE64-NEXT: movbel 1(%rdi), %eax 691; MOVBE64-NEXT: retq 692 %tmp = bitcast i32* %arg to i8* 693 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 694 %tmp2 = load i8, i8* %tmp1, align 1 695 %tmp3 = zext i8 %tmp2 to i32 696 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 697 %tmp5 = load i8, i8* %tmp4, align 1 698 %tmp6 = zext i8 %tmp5 to i32 699 %tmp7 = shl nuw nsw i32 %tmp6, 8 700 %tmp8 = or i32 %tmp7, %tmp3 701 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 702 %tmp10 = load i8, i8* %tmp9, align 1 703 %tmp11 = zext i8 %tmp10 to i32 704 %tmp12 = shl nuw nsw i32 %tmp11, 16 705 %tmp13 = or i32 %tmp8, %tmp12 706 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 707 %tmp15 = load i8, i8* %tmp14, align 1 708 %tmp16 = zext i8 %tmp15 to i32 709 %tmp17 = shl nuw nsw i32 %tmp16, 24 710 %tmp18 = or i32 %tmp13, %tmp17 711 ret i32 %tmp18 712} 713 714; i8* p; 715; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 716define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { 717; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap: 718; BSWAP: # %bb.0: 719; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 720; BSWAP-NEXT: movl -4(%eax), %eax 721; BSWAP-NEXT: bswapl %eax 722; BSWAP-NEXT: retl 723; 724; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap: 725; MOVBE: # %bb.0: 726; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 727; MOVBE-NEXT: movbel -4(%eax), %eax 728; MOVBE-NEXT: retl 729; 730; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap: 731; BSWAP64: # %bb.0: 732; BSWAP64-NEXT: movl -4(%rdi), %eax 733; BSWAP64-NEXT: bswapl %eax 734; BSWAP64-NEXT: retq 735; 736; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap: 737; MOVBE64: # %bb.0: 738; MOVBE64-NEXT: movbel -4(%rdi), %eax 739; MOVBE64-NEXT: retq 740 %tmp = bitcast i32* %arg to i8* 741 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 742 %tmp2 = load i8, i8* %tmp1, align 1 743 %tmp3 = zext i8 %tmp2 to i32 744 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 745 %tmp5 = load i8, i8* %tmp4, align 1 746 %tmp6 = zext i8 %tmp5 to i32 747 %tmp7 = shl nuw nsw i32 %tmp6, 8 748 %tmp8 = or i32 %tmp7, %tmp3 749 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 750 %tmp10 = load i8, i8* %tmp9, align 1 751 %tmp11 = zext i8 %tmp10 to i32 752 %tmp12 = shl nuw nsw i32 %tmp11, 16 753 %tmp13 = or i32 %tmp8, %tmp12 754 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 755 %tmp15 = load i8, i8* %tmp14, align 1 756 %tmp16 = zext i8 %tmp15 to i32 757 %tmp17 = shl nuw nsw i32 %tmp16, 24 758 %tmp18 = or i32 %tmp13, %tmp17 759 ret i32 %tmp18 760} 761 762; i8* p; i32 i; 763; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3] 764define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) { 765; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset: 766; BSWAP: # %bb.0: 767; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 768; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx 769; BSWAP-NEXT: movl (%ecx,%eax), %eax 770; BSWAP-NEXT: bswapl %eax 771; BSWAP-NEXT: retl 772; 773; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset: 774; MOVBE: # %bb.0: 775; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 776; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 777; MOVBE-NEXT: movbel (%ecx,%eax), %eax 778; MOVBE-NEXT: retl 779; 780; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset: 781; BSWAP64: # %bb.0: 782; BSWAP64-NEXT: movslq %esi, %rax 783; BSWAP64-NEXT: movl (%rdi,%rax), %eax 784; BSWAP64-NEXT: bswapl %eax 785; BSWAP64-NEXT: retq 786; 787; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset: 788; MOVBE64: # %bb.0: 789; MOVBE64-NEXT: movslq %esi, %rax 790; MOVBE64-NEXT: movbel (%rdi,%rax), %eax 791; MOVBE64-NEXT: retq 792 %tmp = bitcast i32* %arg to i8* 793 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1 794 %tmp3 = load i8, i8* %tmp2, align 1 795 %tmp4 = zext i8 %tmp3 to i32 796 %tmp5 = shl nuw nsw i32 %tmp4, 24 797 %tmp6 = add nuw nsw i32 %arg1, 1 798 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6 799 %tmp8 = load i8, i8* %tmp7, align 1 800 %tmp9 = zext i8 %tmp8 to i32 801 %tmp10 = shl nuw nsw i32 %tmp9, 16 802 %tmp11 = or i32 %tmp10, %tmp5 803 %tmp12 = add nuw nsw i32 %arg1, 2 804 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12 805 %tmp14 = load i8, i8* %tmp13, align 1 806 %tmp15 = zext i8 %tmp14 to i32 807 %tmp16 = shl nuw nsw i32 %tmp15, 8 808 %tmp17 = or i32 %tmp11, %tmp16 809 %tmp18 = add nuw nsw i32 %arg1, 3 810 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18 811 %tmp20 = load i8, i8* %tmp19, align 1 812 %tmp21 = zext i8 %tmp20 to i32 813 %tmp22 = or i32 %tmp17, %tmp21 814 ret i32 %tmp22 815} 816 817; Verify that we don't crash handling shl i32 %conv57, 32 818define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) { 819; CHECK-LABEL: shift_i32_by_32: 820; CHECK: # %bb.0: # %entry 821; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 822; CHECK-NEXT: movl $-1, 4(%eax) 823; CHECK-NEXT: movl $-1, (%eax) 824; CHECK-NEXT: retl 825; 826; CHECK64-LABEL: shift_i32_by_32: 827; CHECK64: # %bb.0: # %entry 828; CHECK64-NEXT: movq $-1, (%rdx) 829; CHECK64-NEXT: retq 830entry: 831 %load1 = load i8, i8* %src1, align 1 832 %conv46 = zext i8 %load1 to i32 833 %shl47 = shl i32 %conv46, 56 834 %or55 = or i32 %shl47, 0 835 %load2 = load i8, i8* %src2, align 1 836 %conv57 = zext i8 %load2 to i32 837 %shl58 = shl i32 %conv57, 32 838 %or59 = or i32 %or55, %shl58 839 %or74 = or i32 %or59, 0 840 %conv75 = sext i32 %or74 to i64 841 store i64 %conv75, i64* %dst, align 8 842 ret void 843} 844 845declare i16 @llvm.bswap.i16(i16) 846 847; i16* p; 848; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) 849define i32 @load_i32_by_bswap_i16(i32* %arg) { 850; BSWAP-LABEL: load_i32_by_bswap_i16: 851; BSWAP: # %bb.0: 852; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 853; BSWAP-NEXT: movl (%eax), %eax 854; BSWAP-NEXT: bswapl %eax 855; BSWAP-NEXT: retl 856; 857; MOVBE-LABEL: load_i32_by_bswap_i16: 858; MOVBE: # %bb.0: 859; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 860; MOVBE-NEXT: movbel (%eax), %eax 861; MOVBE-NEXT: retl 862; 863; BSWAP64-LABEL: load_i32_by_bswap_i16: 864; BSWAP64: # %bb.0: 865; BSWAP64-NEXT: movl (%rdi), %eax 866; BSWAP64-NEXT: bswapl %eax 867; BSWAP64-NEXT: retq 868; 869; MOVBE64-LABEL: load_i32_by_bswap_i16: 870; MOVBE64: # %bb.0: 871; MOVBE64-NEXT: movbel (%rdi), %eax 872; MOVBE64-NEXT: retq 873 %tmp = bitcast i32* %arg to i16* 874 %tmp1 = load i16, i16* %tmp, align 4 875 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 876 %tmp2 = zext i16 %tmp11 to i32 877 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 878 %tmp4 = load i16, i16* %tmp3, align 1 879 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 880 %tmp5 = zext i16 %tmp41 to i32 881 %tmp6 = shl nuw nsw i32 %tmp2, 16 882 %tmp7 = or i32 %tmp6, %tmp5 883 ret i32 %tmp7 884} 885 886; i16* p; 887; (i32) p[0] | (sext(p[1] << 16) to i32) 888define i32 @load_i32_by_sext_i16(i32* %arg) { 889; CHECK-LABEL: load_i32_by_sext_i16: 890; CHECK: # %bb.0: 891; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 892; CHECK-NEXT: movl (%eax), %eax 893; CHECK-NEXT: retl 894; 895; CHECK64-LABEL: load_i32_by_sext_i16: 896; CHECK64: # %bb.0: 897; CHECK64-NEXT: movl (%rdi), %eax 898; CHECK64-NEXT: retq 899 %tmp = bitcast i32* %arg to i16* 900 %tmp1 = load i16, i16* %tmp, align 1 901 %tmp2 = zext i16 %tmp1 to i32 902 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 903 %tmp4 = load i16, i16* %tmp3, align 1 904 %tmp5 = sext i16 %tmp4 to i32 905 %tmp6 = shl nuw nsw i32 %tmp5, 16 906 %tmp7 = or i32 %tmp6, %tmp2 907 ret i32 %tmp7 908} 909 910; i8* arg; i32 i; 911; p = arg + 12; 912; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 913define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { 914; CHECK-LABEL: load_i32_by_i8_base_offset_index: 915; CHECK: # %bb.0: 916; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 917; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 918; CHECK-NEXT: movl 12(%ecx,%eax), %eax 919; CHECK-NEXT: retl 920; 921; CHECK64-LABEL: load_i32_by_i8_base_offset_index: 922; CHECK64: # %bb.0: 923; CHECK64-NEXT: movl %esi, %eax 924; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 925; CHECK64-NEXT: retq 926 %tmp = add nuw nsw i32 %i, 3 927 %tmp2 = add nuw nsw i32 %i, 2 928 %tmp3 = add nuw nsw i32 %i, 1 929 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 930 %tmp5 = zext i32 %i to i64 931 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 932 %tmp7 = load i8, i8* %tmp6, align 1 933 %tmp8 = zext i8 %tmp7 to i32 934 %tmp9 = zext i32 %tmp3 to i64 935 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 936 %tmp11 = load i8, i8* %tmp10, align 1 937 %tmp12 = zext i8 %tmp11 to i32 938 %tmp13 = shl nuw nsw i32 %tmp12, 8 939 %tmp14 = or i32 %tmp13, %tmp8 940 %tmp15 = zext i32 %tmp2 to i64 941 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 942 %tmp17 = load i8, i8* %tmp16, align 1 943 %tmp18 = zext i8 %tmp17 to i32 944 %tmp19 = shl nuw nsw i32 %tmp18, 16 945 %tmp20 = or i32 %tmp14, %tmp19 946 %tmp21 = zext i32 %tmp to i64 947 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 948 %tmp23 = load i8, i8* %tmp22, align 1 949 %tmp24 = zext i8 %tmp23 to i32 950 %tmp25 = shl nuw i32 %tmp24, 24 951 %tmp26 = or i32 %tmp20, %tmp25 952 ret i32 %tmp26 953} 954 955; i8* arg; i32 i; 956; p = arg + 12; 957; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 958define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { 959; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 960; CHECK: # %bb.0: 961; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 962; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 963; CHECK-NEXT: movl 13(%ecx,%eax), %eax 964; CHECK-NEXT: retl 965; 966; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2: 967; CHECK64: # %bb.0: 968; CHECK64-NEXT: movl %esi, %eax 969; CHECK64-NEXT: movl 13(%rdi,%rax), %eax 970; CHECK64-NEXT: retq 971 %tmp = add nuw nsw i32 %i, 4 972 %tmp2 = add nuw nsw i32 %i, 3 973 %tmp3 = add nuw nsw i32 %i, 2 974 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 975 %tmp5 = add nuw nsw i32 %i, 1 976 %tmp27 = zext i32 %tmp5 to i64 977 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 978 %tmp29 = load i8, i8* %tmp28, align 1 979 %tmp30 = zext i8 %tmp29 to i32 980 %tmp31 = zext i32 %tmp3 to i64 981 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 982 %tmp33 = load i8, i8* %tmp32, align 1 983 %tmp34 = zext i8 %tmp33 to i32 984 %tmp35 = shl nuw nsw i32 %tmp34, 8 985 %tmp36 = or i32 %tmp35, %tmp30 986 %tmp37 = zext i32 %tmp2 to i64 987 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 988 %tmp39 = load i8, i8* %tmp38, align 1 989 %tmp40 = zext i8 %tmp39 to i32 990 %tmp41 = shl nuw nsw i32 %tmp40, 16 991 %tmp42 = or i32 %tmp36, %tmp41 992 %tmp43 = zext i32 %tmp to i64 993 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 994 %tmp45 = load i8, i8* %tmp44, align 1 995 %tmp46 = zext i8 %tmp45 to i32 996 %tmp47 = shl nuw i32 %tmp46, 24 997 %tmp48 = or i32 %tmp42, %tmp47 998 ret i32 %tmp48 999} 1000 1001; i8* arg; i32 i; 1002; 1003; p0 = arg; 1004; p1 = arg + i + 1; 1005; p2 = arg + i + 2; 1006; p3 = arg + i + 3; 1007; 1008; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 1009; 1010; This test excercises zero and any extend loads as a part of load combine pattern. 1011; In order to fold the pattern above we need to reassociate the address computation 1012; first. By the time the address computation is reassociated loads are combined to 1013; to zext and aext loads. 1014define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) { 1015; CHECK-LABEL: load_i32_by_i8_zaext_loads: 1016; CHECK: # %bb.0: 1017; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1018; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1019; CHECK-NEXT: movl 12(%ecx,%eax), %eax 1020; CHECK-NEXT: retl 1021; 1022; CHECK64-LABEL: load_i32_by_i8_zaext_loads: 1023; CHECK64: # %bb.0: 1024; CHECK64-NEXT: movl %esi, %eax 1025; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1026; CHECK64-NEXT: retq 1027 %tmp = add nuw nsw i32 %arg1, 3 1028 %tmp2 = add nuw nsw i32 %arg1, 2 1029 %tmp3 = add nuw nsw i32 %arg1, 1 1030 %tmp4 = zext i32 %tmp to i64 1031 %tmp5 = zext i32 %tmp2 to i64 1032 %tmp6 = zext i32 %tmp3 to i64 1033 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4 1034 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5 1035 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6 1036 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12 1037 %tmp33 = zext i32 %arg1 to i64 1038 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33 1039 %tmp35 = load i8, i8* %tmp34, align 1 1040 %tmp36 = zext i8 %tmp35 to i32 1041 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12 1042 %tmp38 = load i8, i8* %tmp37, align 1 1043 %tmp39 = zext i8 %tmp38 to i32 1044 %tmp40 = shl nuw nsw i32 %tmp39, 8 1045 %tmp41 = or i32 %tmp40, %tmp36 1046 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12 1047 %tmp43 = load i8, i8* %tmp42, align 1 1048 %tmp44 = zext i8 %tmp43 to i32 1049 %tmp45 = shl nuw nsw i32 %tmp44, 16 1050 %tmp46 = or i32 %tmp41, %tmp45 1051 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12 1052 %tmp48 = load i8, i8* %tmp47, align 1 1053 %tmp49 = zext i8 %tmp48 to i32 1054 %tmp50 = shl nuw i32 %tmp49, 24 1055 %tmp51 = or i32 %tmp46, %tmp50 1056 ret i32 %tmp51 1057} 1058 1059; The same as load_i32_by_i8_zaext_loads but the last load is combined to 1060; a sext load. 1061; 1062; i8* arg; i32 i; 1063; 1064; p0 = arg; 1065; p1 = arg + i + 1; 1066; p2 = arg + i + 2; 1067; p3 = arg + i + 3; 1068; 1069; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 1070define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) { 1071; CHECK-LABEL: load_i32_by_i8_zsext_loads: 1072; CHECK: # %bb.0: 1073; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1074; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1075; CHECK-NEXT: movl 12(%ecx,%eax), %eax 1076; CHECK-NEXT: retl 1077; 1078; CHECK64-LABEL: load_i32_by_i8_zsext_loads: 1079; CHECK64: # %bb.0: 1080; CHECK64-NEXT: movl %esi, %eax 1081; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1082; CHECK64-NEXT: retq 1083 %tmp = add nuw nsw i32 %arg1, 3 1084 %tmp2 = add nuw nsw i32 %arg1, 2 1085 %tmp3 = add nuw nsw i32 %arg1, 1 1086 %tmp4 = zext i32 %tmp to i64 1087 %tmp5 = zext i32 %tmp2 to i64 1088 %tmp6 = zext i32 %tmp3 to i64 1089 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4 1090 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5 1091 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6 1092 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12 1093 %tmp33 = zext i32 %arg1 to i64 1094 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33 1095 %tmp35 = load i8, i8* %tmp34, align 1 1096 %tmp36 = zext i8 %tmp35 to i32 1097 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12 1098 %tmp38 = load i8, i8* %tmp37, align 1 1099 %tmp39 = zext i8 %tmp38 to i32 1100 %tmp40 = shl nuw nsw i32 %tmp39, 8 1101 %tmp41 = or i32 %tmp40, %tmp36 1102 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12 1103 %tmp43 = load i8, i8* %tmp42, align 1 1104 %tmp44 = zext i8 %tmp43 to i32 1105 %tmp45 = shl nuw nsw i32 %tmp44, 16 1106 %tmp46 = or i32 %tmp41, %tmp45 1107 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12 1108 %tmp48 = load i8, i8* %tmp47, align 1 1109 %tmp49 = sext i8 %tmp48 to i16 1110 %tmp50 = zext i16 %tmp49 to i32 1111 %tmp51 = shl nuw i32 %tmp50, 24 1112 %tmp52 = or i32 %tmp46, %tmp51 1113 ret i32 %tmp52 1114} 1115 1116; i8* p; 1117; (i32) p[0] | ((i32) p[1] << 8) 1118define i32 @zext_load_i32_by_i8(i32* %arg) { 1119; CHECK-LABEL: zext_load_i32_by_i8: 1120; CHECK: # %bb.0: 1121; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1122; CHECK-NEXT: movzbl (%eax), %ecx 1123; CHECK-NEXT: movzbl 1(%eax), %eax 1124; CHECK-NEXT: shll $8, %eax 1125; CHECK-NEXT: orl %ecx, %eax 1126; CHECK-NEXT: retl 1127; 1128; CHECK64-LABEL: zext_load_i32_by_i8: 1129; CHECK64: # %bb.0: 1130; CHECK64-NEXT: movzbl (%rdi), %ecx 1131; CHECK64-NEXT: movzbl 1(%rdi), %eax 1132; CHECK64-NEXT: shll $8, %eax 1133; CHECK64-NEXT: orl %ecx, %eax 1134; CHECK64-NEXT: retq 1135 %tmp = bitcast i32* %arg to i8* 1136 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1137 %tmp2 = load i8, i8* %tmp1, align 1 1138 %tmp3 = zext i8 %tmp2 to i32 1139 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1140 %tmp5 = load i8, i8* %tmp4, align 1 1141 %tmp6 = zext i8 %tmp5 to i32 1142 %tmp7 = shl nuw nsw i32 %tmp6, 8 1143 %tmp8 = or i32 %tmp7, %tmp3 1144 ret i32 %tmp8 1145} 1146 1147; i8* p; 1148; ((i32) p[0] << 8) | ((i32) p[1] << 16) 1149define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { 1150; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 1151; CHECK: # %bb.0: 1152; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1153; CHECK-NEXT: movzbl (%eax), %ecx 1154; CHECK-NEXT: shll $8, %ecx 1155; CHECK-NEXT: movzbl 1(%eax), %eax 1156; CHECK-NEXT: shll $16, %eax 1157; CHECK-NEXT: orl %ecx, %eax 1158; CHECK-NEXT: retl 1159; 1160; CHECK64-LABEL: zext_load_i32_by_i8_shl_8: 1161; CHECK64: # %bb.0: 1162; CHECK64-NEXT: movzbl (%rdi), %ecx 1163; CHECK64-NEXT: shll $8, %ecx 1164; CHECK64-NEXT: movzbl 1(%rdi), %eax 1165; CHECK64-NEXT: shll $16, %eax 1166; CHECK64-NEXT: orl %ecx, %eax 1167; CHECK64-NEXT: retq 1168 %tmp = bitcast i32* %arg to i8* 1169 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1170 %tmp2 = load i8, i8* %tmp1, align 1 1171 %tmp3 = zext i8 %tmp2 to i32 1172 %tmp30 = shl nuw nsw i32 %tmp3, 8 1173 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1174 %tmp5 = load i8, i8* %tmp4, align 1 1175 %tmp6 = zext i8 %tmp5 to i32 1176 %tmp7 = shl nuw nsw i32 %tmp6, 16 1177 %tmp8 = or i32 %tmp7, %tmp30 1178 ret i32 %tmp8 1179} 1180 1181; i8* p; 1182; ((i32) p[0] << 16) | ((i32) p[1] << 24) 1183define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { 1184; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 1185; CHECK: # %bb.0: 1186; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1187; CHECK-NEXT: movzbl (%eax), %ecx 1188; CHECK-NEXT: shll $16, %ecx 1189; CHECK-NEXT: movzbl 1(%eax), %eax 1190; CHECK-NEXT: shll $24, %eax 1191; CHECK-NEXT: orl %ecx, %eax 1192; CHECK-NEXT: retl 1193; 1194; CHECK64-LABEL: zext_load_i32_by_i8_shl_16: 1195; CHECK64: # %bb.0: 1196; CHECK64-NEXT: movzbl (%rdi), %ecx 1197; CHECK64-NEXT: shll $16, %ecx 1198; CHECK64-NEXT: movzbl 1(%rdi), %eax 1199; CHECK64-NEXT: shll $24, %eax 1200; CHECK64-NEXT: orl %ecx, %eax 1201; CHECK64-NEXT: retq 1202 %tmp = bitcast i32* %arg to i8* 1203 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1204 %tmp2 = load i8, i8* %tmp1, align 1 1205 %tmp3 = zext i8 %tmp2 to i32 1206 %tmp30 = shl nuw nsw i32 %tmp3, 16 1207 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1208 %tmp5 = load i8, i8* %tmp4, align 1 1209 %tmp6 = zext i8 %tmp5 to i32 1210 %tmp7 = shl nuw nsw i32 %tmp6, 24 1211 %tmp8 = or i32 %tmp7, %tmp30 1212 ret i32 %tmp8 1213} 1214 1215; i8* p; 1216; (i32) p[1] | ((i32) p[0] << 8) 1217define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { 1218; CHECK-LABEL: zext_load_i32_by_i8_bswap: 1219; CHECK: # %bb.0: 1220; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1221; CHECK-NEXT: movzbl 1(%eax), %ecx 1222; CHECK-NEXT: movzbl (%eax), %eax 1223; CHECK-NEXT: shll $8, %eax 1224; CHECK-NEXT: orl %ecx, %eax 1225; CHECK-NEXT: retl 1226; 1227; CHECK64-LABEL: zext_load_i32_by_i8_bswap: 1228; CHECK64: # %bb.0: 1229; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1230; CHECK64-NEXT: movzbl (%rdi), %eax 1231; CHECK64-NEXT: shll $8, %eax 1232; CHECK64-NEXT: orl %ecx, %eax 1233; CHECK64-NEXT: retq 1234 %tmp = bitcast i32* %arg to i8* 1235 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1236 %tmp2 = load i8, i8* %tmp1, align 1 1237 %tmp3 = zext i8 %tmp2 to i32 1238 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1239 %tmp5 = load i8, i8* %tmp4, align 1 1240 %tmp6 = zext i8 %tmp5 to i32 1241 %tmp7 = shl nuw nsw i32 %tmp6, 8 1242 %tmp8 = or i32 %tmp7, %tmp3 1243 ret i32 %tmp8 1244} 1245 1246; i8* p; 1247; ((i32) p[1] << 8) | ((i32) p[0] << 16) 1248define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { 1249; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1250; CHECK: # %bb.0: 1251; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1252; CHECK-NEXT: movzbl 1(%eax), %ecx 1253; CHECK-NEXT: shll $8, %ecx 1254; CHECK-NEXT: movzbl (%eax), %eax 1255; CHECK-NEXT: shll $16, %eax 1256; CHECK-NEXT: orl %ecx, %eax 1257; CHECK-NEXT: retl 1258; 1259; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1260; CHECK64: # %bb.0: 1261; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1262; CHECK64-NEXT: shll $8, %ecx 1263; CHECK64-NEXT: movzbl (%rdi), %eax 1264; CHECK64-NEXT: shll $16, %eax 1265; CHECK64-NEXT: orl %ecx, %eax 1266; CHECK64-NEXT: retq 1267 %tmp = bitcast i32* %arg to i8* 1268 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1269 %tmp2 = load i8, i8* %tmp1, align 1 1270 %tmp3 = zext i8 %tmp2 to i32 1271 %tmp30 = shl nuw nsw i32 %tmp3, 8 1272 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1273 %tmp5 = load i8, i8* %tmp4, align 1 1274 %tmp6 = zext i8 %tmp5 to i32 1275 %tmp7 = shl nuw nsw i32 %tmp6, 16 1276 %tmp8 = or i32 %tmp7, %tmp30 1277 ret i32 %tmp8 1278} 1279 1280; i8* p; 1281; ((i32) p[1] << 16) | ((i32) p[0] << 24) 1282define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { 1283; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1284; CHECK: # %bb.0: 1285; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1286; CHECK-NEXT: movzbl 1(%eax), %ecx 1287; CHECK-NEXT: shll $16, %ecx 1288; CHECK-NEXT: movzbl (%eax), %eax 1289; CHECK-NEXT: shll $24, %eax 1290; CHECK-NEXT: orl %ecx, %eax 1291; CHECK-NEXT: retl 1292; 1293; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1294; CHECK64: # %bb.0: 1295; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1296; CHECK64-NEXT: shll $16, %ecx 1297; CHECK64-NEXT: movzbl (%rdi), %eax 1298; CHECK64-NEXT: shll $24, %eax 1299; CHECK64-NEXT: orl %ecx, %eax 1300; CHECK64-NEXT: retq 1301 %tmp = bitcast i32* %arg to i8* 1302 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1303 %tmp2 = load i8, i8* %tmp1, align 1 1304 %tmp3 = zext i8 %tmp2 to i32 1305 %tmp30 = shl nuw nsw i32 %tmp3, 16 1306 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1307 %tmp5 = load i8, i8* %tmp4, align 1 1308 %tmp6 = zext i8 %tmp5 to i32 1309 %tmp7 = shl nuw nsw i32 %tmp6, 24 1310 %tmp8 = or i32 %tmp7, %tmp30 1311 ret i32 %tmp8 1312} 1313