1; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s
2
3;; This test ensures that MergeConsecutiveStores does not attempt to
4;; merge stores or loads when doing so would result in unaligned
5;; memory operations (unless the target supports those, e.g. X86).
6
7;; This issue happen in other situations for other targets, but PPC
8;; with Altivec extensions was chosen for the test because it does not
9;; support unaligned access with AltiVec instructions. If the 4
10;; load/stores get merged to an v4i32 vector type severely bad code
11;; gets generated: it painstakingly copies the values to a temporary
12;; location on the stack, with vector ops, in order to then use
13;; integer ops to load from the temporary stack location and store to
14;; the final location. Yuck!
15
16%struct.X = type { i32, i32, i32, i32 }
17
18@fx = common global %struct.X zeroinitializer, align 4
19@fy = common global %struct.X zeroinitializer, align 4
20
21;; In this test case, lvx and stvx instructions should NOT be
22;; generated, as the alignment is not sufficient for it to be
23;; worthwhile.
24
25;; CHECK-LABEL: f:
26;; CHECK:      lwzu
27;; CHECK-NEXT: lwz
28;; CHECK-NEXT: lwz
29;; CHECK-NEXT: lwz
30;; CHECK-NEXT: stwu
31;; CHECK-NEXT: stw
32;; CHECK-NEXT: stw
33;; CHECK-NEXT: stw
34;; CHECK-NEXT: blr
35define void @f() {
36entry:
37  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
38  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
39  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
40  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
41  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
42  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
43  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
44  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
45  ret void
46}
47
48@gx = common global %struct.X zeroinitializer, align 16
49@gy = common global %struct.X zeroinitializer, align 16
50
51;; In this test, lvx and stvx instructions SHOULD be generated, as
52;; the 16-byte alignment of the new load/store is acceptable.
53;; CHECK-LABEL: g:
54;; CHECK: lvx
55;; CHECK: stvx
56;; CHECK: blr
57define void @g() {
58entry:
59  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
60  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
61  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
62  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
63  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
64  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
65  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
66  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
67  ret void
68}
69