1; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
2; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
3; PR4891
4
5; Both loads should happen before either store.
6
7; CHECK: movl  (%rdi), %[[R1:...]]
8; CHECK: movl  (%rsi), %[[R2:...]]
9; CHECK: movl  %[[R2]], (%rdi)
10; CHECK: movl  %[[R1]], (%rsi)
11
12; WIN64: movl  (%rcx), %[[R1:...]]
13; WIN64: movl  (%rdx), %[[R2:...]]
14; WIN64: movl  %[[R2]], (%rcx)
15; WIN64: movl  %[[R1]], (%rdx)
16
17define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
18entry:
19  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
20  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
21  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
22  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
23  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
24  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
25  store i32 %tmp3, i32* %c, align 4
26  ret void
27}
28