diff options
Diffstat (limited to 'test/CodeGen/PowerPC/MergeConsecutiveStores.ll')
-rw-r--r-- | test/CodeGen/PowerPC/MergeConsecutiveStores.ll | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/test/CodeGen/PowerPC/MergeConsecutiveStores.ll b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll new file mode 100644 index 000000000000..977b3b701cce --- /dev/null +++ b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s + +;; This test ensures that MergeConsecutiveStores does not attempt to +;; merge stores or loads when doing so would result in unaligned +;; memory operations (unless the target supports those, e.g. X86). + +;; This issue happen in other situations for other targets, but PPC +;; with Altivec extensions was chosen for the test because it does not +;; support unaligned access with AltiVec instructions. If the 4 +;; load/stores get merged to an v4i32 vector type severely bad code +;; gets generated: it painstakingly copies the values to a temporary +;; location on the stack, with vector ops, in order to then use +;; integer ops to load from the temporary stack location and store to +;; the final location. Yuck! + +%struct.X = type { i32, i32, i32, i32 } + +@fx = common global %struct.X zeroinitializer, align 4 +@fy = common global %struct.X zeroinitializer, align 4 + +;; In this test case, lvx and stvx instructions should NOT be +;; generated, as the alignment is not sufficient for it to be +;; worthwhile. + +;; CHECK-LABEL: f: +;; CHECK: lwzu +;; CHECK-NEXT: lwz +;; CHECK-NEXT: lwz +;; CHECK-NEXT: lwz +;; CHECK-NEXT: stwu +;; CHECK-NEXT: stw +;; CHECK-NEXT: stw +;; CHECK-NEXT: stw +;; CHECK-NEXT: blr +define void @f() { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4 + %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4 + %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4 + store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4 + store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4 + store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4 + store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4 + ret void +} + +@gx = common global %struct.X zeroinitializer, align 16 +@gy = common global %struct.X zeroinitializer, align 16 + +;; In this test, lvx and stvx instructions SHOULD be generated, as +;; the 16-byte alignment of the new load/store is acceptable. +;; CHECK-LABEL: g: +;; CHECK: lvx +;; CHECK: stvx +;; CHECK: blr +define void @g() { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16 + %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4 + %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4 + %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4 + store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16 + store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4 + store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4 + store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4 + ret void +} |