aboutsummaryrefslogtreecommitdiff
path: root/compiler-rt/lib/builtins/arm/udivsi3.S
diff options
context:
space:
mode:
Diffstat (limited to 'compiler-rt/lib/builtins/arm/udivsi3.S')
-rw-r--r--compiler-rt/lib/builtins/arm/udivsi3.S261
1 files changed, 261 insertions, 0 deletions
diff --git a/compiler-rt/lib/builtins/arm/udivsi3.S b/compiler-rt/lib/builtins/arm/udivsi3.S
new file mode 100644
index 000000000000..9b1b035b33d6
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/udivsi3.S
@@ -0,0 +1,261 @@
+//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __udivsi3 (32-bit unsigned integer divide)
+// function for the ARM 32-bit architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+
+DEFINE_CODE_STATE
+
+ .p2align 2
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
+
+@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
+@ Calculate and return the quotient of the (unsigned) division.
+
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divby0)
+ udiv r0, r0, r1
+ bx lr
+
+LOCAL_LABEL(divby0):
+ mov r0, #0
+# ifdef __ARM_EABI__
+ b __aeabi_idiv0
+# else
+ JMP(lr)
+# endif
+
+#else // ! __ARM_ARCH_EXT_IDIV__
+ cmp r1, #1
+ bcc LOCAL_LABEL(divby0)
+#if defined(USE_THUMB_1)
+ bne LOCAL_LABEL(num_neq_denom)
+ JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
+ IT(eq)
+ JMPc(lr, eq)
+#endif
+ cmp r0, r1
+#if defined(USE_THUMB_1)
+ bhs LOCAL_LABEL(num_ge_denom)
+ movs r0, #0
+ JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
+ ITT(cc)
+ movcc r0, #0
+ JMPc(lr, cc)
+#endif
+
+ // Implement division using binary long division algorithm.
+ //
+ // r0 is the numerator, r1 the denominator.
+ //
+ // The code before JMP computes the correct shift I, so that
+ // r0 and (r1 << I) have the highest bit set in the same position.
+ // At the time of JMP, ip := .Ldiv0block - 12 * I.
+ // This depends on the fixed instruction size of block.
+ // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+ //
+ // block(shift) implements the test-and-update-quotient core.
+ // It assumes (r0 << shift) can be computed without overflow and
+ // that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+
+# if defined(__ARM_FEATURE_CLZ)
+ clz ip, r0
+ clz r3, r1
+ // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
+ sub r3, r3, ip
+# if defined(USE_THUMB_2)
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ sub ip, ip, r3, lsl #2
+ sub ip, ip, r3, lsl #3
+ mov r3, #0
+ bx ip
+# else // No CLZ Feature
+# if defined(USE_THUMB_2)
+# error THUMB mode requires CLZ or UDIV
+# endif
+# if defined(USE_THUMB_1)
+# define BLOCK_SIZE 10
+# else
+# define BLOCK_SIZE 12
+# endif
+
+ mov r2, r0
+# if defined(USE_THUMB_1)
+ mov ip, r0
+ adr r0, LOCAL_LABEL(div0block)
+ adds r0, #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ lsrs r3, r2, #16
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_16)
+ movs r2, r3
+ subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(16 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #8
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_8)
+ movs r2, r3
+ subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(8 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #4
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_4)
+ movs r2, r3
+ subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+# else
+ movhs r2, r3
+ subhs ip, #(4 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #2
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_2)
+ movs r2, r3
+ subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(2 * BLOCK_SIZE)
+# endif
+
+ // Last block, no need to update r2 or r3.
+# if defined(USE_THUMB_1)
+ lsrs r3, r2, #1
+ cmp r3, r1
+ blo LOCAL_LABEL(skip_1)
+ subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+ movs r2, r0
+ mov r0, ip
+ movs r3, #0
+ JMP (r2)
+
+# else
+ cmp r1, r2, lsr #1
+ subls ip, ip, #(1 * BLOCK_SIZE)
+
+ movs r3, #0
+
+ JMP(ip)
+# endif
+# endif // __ARM_FEATURE_CLZ
+
+
+#define IMM #
+ // due to the range limit of branch in Thumb1, we have to place the
+ // block closer
+LOCAL_LABEL(divby0):
+ movs r0, #0
+# if defined(__ARM_EABI__)
+ push {r7, lr}
+ bl __aeabi_idiv0 // due to relocation limit, can't use b.
+ pop {r7, pc}
+# else
+ JMP(lr)
+# endif
+
+
+#if defined(USE_THUMB_1)
+#define block(shift) \
+ lsls r2, r1, IMM shift; \
+ cmp r0, r2; \
+ blo LOCAL_LABEL(block_skip_##shift); \
+ subs r0, r0, r2; \
+ LOCAL_LABEL(block_skip_##shift) :; \
+ adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
+
+ // TODO: if current location counter is not not word aligned, we don't
+ // need the .p2align and nop
+ // Label div0block must be word-aligned. First align block 31
+ .p2align 2
+ nop // Padding to align div0block as 31 blocks = 310 bytes
+
+#else
+#define block(shift) \
+ cmp r0, r1, lsl IMM shift; \
+ ITT(hs); \
+ WIDE(addhs) r3, r3, IMM (1 << shift); \
+ WIDE(subhs) r0, r0, r1, lsl IMM shift
+#endif
+
+ block(31)
+ block(30)
+ block(29)
+ block(28)
+ block(27)
+ block(26)
+ block(25)
+ block(24)
+ block(23)
+ block(22)
+ block(21)
+ block(20)
+ block(19)
+ block(18)
+ block(17)
+ block(16)
+ block(15)
+ block(14)
+ block(13)
+ block(12)
+ block(11)
+ block(10)
+ block(9)
+ block(8)
+ block(7)
+ block(6)
+ block(5)
+ block(4)
+ block(3)
+ block(2)
+ block(1)
+LOCAL_LABEL(div0block):
+ block(0)
+
+ mov r0, r3
+ JMP(lr)
+#endif // __ARM_ARCH_EXT_IDIV__
+
+END_COMPILERRT_FUNCTION(__udivsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+