diff options
Diffstat (limited to 'contrib/compiler-rt/lib/builtins/sparc64/divsi3.S')
-rw-r--r-- | contrib/compiler-rt/lib/builtins/sparc64/divsi3.S | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S new file mode 100644 index 000000000000..70fc1f407f48 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S @@ -0,0 +1,330 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/4 ) + * Note that this is log_base_(2ˆ4) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C + * a large dividend is one greater than 2ˆ(31-4 ) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=4 + * $2 -- the current accumulation of quotient bits + * 4 -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * %o3 -- current remainder + * %o2 -- current quotient + * %o5 -- current comparand + * cc -- set on current value of %o3 + * Dynamic output: + * %o3', %o2', %o5', cc' + */ +#include "../assembly.h" +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__udivsi3) + b divide + mov 0,%g3 ! result always nonnegative +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__divsi3) + orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative + bge divide ! if not, skip this junk + xor %o1,%o0,%g3 ! record sign of result in sign of %g3 + tst %o1 + bge 2f + tst %o0 + ! %o1 < 0 + bge divide + neg %o1 + 2: + ! %o0 < 0 + neg %o0 + ! FALL THROUGH +divide: + ! Compute size of quotient, scale comparand. + orcc %o1,%g0,%o5 ! movcc %o1,%o5 + te 2 ! if %o1 = 0 + mov %o0,%o3 + mov 0,%o2 + sethi %hi(1<<(32-4 -1)),%g1 + cmp %o3,%g1 + blu not_really_big + mov 0,%o4 + ! + ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, + ! as our usual 4-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is 4*%o4+%g2, where + ! %g2 <= 4. + ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into +! the top decade: so don't even bother to compare to %o3. +1: + cmp %o5,%g1 + bgeu 3f + mov 1,%g2 + sll %o5,4,%o5 + b 1b + inc %o4 +! Now compute %g2 +2: addcc %o5,%o5,%o5 + bcc not_too_big + add %g2,1,%g2 + ! We're here if the %o1 overflowed when Shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1,4 ,%g1 ! high order bit + srl %o5,1,%o5 ! rest of %o5 + add %o5,%g1,%o5 + b do_single_div + dec %g2 +not_too_big: +3: cmp %o5,%o3 + blu 2b + nop + be do_single_div + nop +! %o5 > %o3: went too far: back up 1 step +! srl %o5,1,%o5 +! dec %g2 +! do single-bit divide steps +! +! We have to be careful here. We know that %o3 >= %o5, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc %g2 + bl end_regular_divide + nop + sub %o3,%o5,%o3 + mov 1,%o2 + b,a end_single_divloop + ! EMPTY +single_divloop: + sll %o2,1,%o2 + bl 1f + srl %o5,1,%o5 + ! %o3 >= 0 + sub %o3,%o5,%o3 + b 2f + inc %o2 + 1: ! %o3 < 0 + add %o3,%o5,%o3 + dec %o2 + 2: + end_single_divloop: + deccc %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + ! EMPTY +not_really_big: +1: + sll %o5,4,%o5 + cmp %o5,%o3 + bleu 1b + inccc %o4 + be got_result + dec %o4 +do_regular_divide: + ! Do the main division iteration + tst %o3 + ! Fall through into divide loop +divloop: + sll %o2,4,%o2 + !depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + 9: +end_regular_divide: + deccc %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup if remainder < 0, otherwise annulled + dec %o2 +got_result: + tst %g3 + bl,a 1f + ! negate for answer < 0, otherwise annulled + neg %o2,%o2 ! %o2 <- -%o2 +1: + retl ! leaf-routine return + mov %o2,%o0 ! quotient <- %o2 |