aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/PowerPC/MergeConsecutiveStores.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/PowerPC/MergeConsecutiveStores.ll')
-rw-r--r--test/CodeGen/PowerPC/MergeConsecutiveStores.ll68
1 files changed, 68 insertions, 0 deletions
diff --git a/test/CodeGen/PowerPC/MergeConsecutiveStores.ll b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll
new file mode 100644
index 000000000000..977b3b701cce
--- /dev/null
+++ b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s
+
+;; This test ensures that MergeConsecutiveStores does not attempt to
+;; merge stores or loads when doing so would result in unaligned
+;; memory operations (unless the target supports those, e.g. X86).
+
+;; This issue happen in other situations for other targets, but PPC
+;; with Altivec extensions was chosen for the test because it does not
+;; support unaligned access with AltiVec instructions. If the 4
+;; load/stores get merged to an v4i32 vector type severely bad code
+;; gets generated: it painstakingly copies the values to a temporary
+;; location on the stack, with vector ops, in order to then use
+;; integer ops to load from the temporary stack location and store to
+;; the final location. Yuck!
+
+%struct.X = type { i32, i32, i32, i32 }
+
+@fx = common global %struct.X zeroinitializer, align 4
+@fy = common global %struct.X zeroinitializer, align 4
+
+;; In this test case, lvx and stvx instructions should NOT be
+;; generated, as the alignment is not sufficient for it to be
+;; worthwhile.
+
+;; CHECK-LABEL: f:
+;; CHECK: lwzu
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: stwu
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: blr
+define void @f() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
+ %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
+ %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
+ %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
+ store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
+ store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
+ store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
+ store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
+ ret void
+}
+
+@gx = common global %struct.X zeroinitializer, align 16
+@gy = common global %struct.X zeroinitializer, align 16
+
+;; In this test, lvx and stvx instructions SHOULD be generated, as
+;; the 16-byte alignment of the new load/store is acceptable.
+;; CHECK-LABEL: g:
+;; CHECK: lvx
+;; CHECK: stvx
+;; CHECK: blr
+define void @g() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
+ %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
+ %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
+ %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
+ store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
+ store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
+ store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
+ store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
+ ret void
+}