aboutsummaryrefslogtreecommitdiff
path: root/contrib/gcc/config/ia64/lib1funcs.asm
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/gcc/config/ia64/lib1funcs.asm')
-rw-r--r--contrib/gcc/config/ia64/lib1funcs.asm794
1 files changed, 0 insertions, 794 deletions
diff --git a/contrib/gcc/config/ia64/lib1funcs.asm b/contrib/gcc/config/ia64/lib1funcs.asm
deleted file mode 100644
index 245a8bb1595a..000000000000
--- a/contrib/gcc/config/ia64/lib1funcs.asm
+++ /dev/null
@@ -1,794 +0,0 @@
-/* Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
- Contributed by James E. Wilson <wilson@cygnus.com>.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING. If not, write to
- the Free Software Foundation, 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
-
-/* As a special exception, if you link this library with other files,
- some of which are compiled with GCC, to produce an executable,
- this library does not by itself cause the resulting executable
- to be covered by the GNU General Public License.
- This exception does not however invalidate any other reasons why
- the executable file might be covered by the GNU General Public License. */
-
-#ifdef L__divxf3
-// Compute a 80-bit IEEE double-extended quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend. farg1 holds the divisor.
-//
-// __divtf3 is an alternate symbol name for backward compatibility.
-
- .text
- .align 16
- .global __divxf3
- .global __divtf3
- .proc __divxf3
-__divxf3:
-__divtf3:
- cmp.eq p7, p0 = r0, r0
- frcpa.s0 f10, p6 = farg0, farg1
- ;;
-(p6) cmp.ne p7, p0 = r0, r0
- .pred.rel.mutex p6, p7
-(p6) fnma.s1 f11 = farg1, f10, f1
-(p6) fma.s1 f12 = farg0, f10, f0
- ;;
-(p6) fma.s1 f13 = f11, f11, f0
-(p6) fma.s1 f14 = f11, f11, f11
- ;;
-(p6) fma.s1 f11 = f13, f13, f11
-(p6) fma.s1 f13 = f14, f10, f10
- ;;
-(p6) fma.s1 f10 = f13, f11, f10
-(p6) fnma.s1 f11 = farg1, f12, farg0
- ;;
-(p6) fma.s1 f11 = f11, f10, f12
-(p6) fnma.s1 f12 = farg1, f10, f1
- ;;
-(p6) fma.s1 f10 = f12, f10, f10
-(p6) fnma.s1 f12 = farg1, f11, farg0
- ;;
-(p6) fma.s0 fret0 = f12, f10, f11
-(p7) mov fret0 = f10
- br.ret.sptk rp
- .endp __divxf3
-#endif
-
-#ifdef L__divdf3
-// Compute a 64-bit IEEE double quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend. farg1 holds the divisor.
-
- .text
- .align 16
- .global __divdf3
- .proc __divdf3
-__divdf3:
- cmp.eq p7, p0 = r0, r0
- frcpa.s0 f10, p6 = farg0, farg1
- ;;
-(p6) cmp.ne p7, p0 = r0, r0
- .pred.rel.mutex p6, p7
-(p6) fmpy.s1 f11 = farg0, f10
-(p6) fnma.s1 f12 = farg1, f10, f1
- ;;
-(p6) fma.s1 f11 = f12, f11, f11
-(p6) fmpy.s1 f13 = f12, f12
- ;;
-(p6) fma.s1 f10 = f12, f10, f10
-(p6) fma.s1 f11 = f13, f11, f11
- ;;
-(p6) fmpy.s1 f12 = f13, f13
-(p6) fma.s1 f10 = f13, f10, f10
- ;;
-(p6) fma.d.s1 f11 = f12, f11, f11
-(p6) fma.s1 f10 = f12, f10, f10
- ;;
-(p6) fnma.d.s1 f8 = farg1, f11, farg0
- ;;
-(p6) fma.d fret0 = f8, f10, f11
-(p7) mov fret0 = f10
- br.ret.sptk rp
- ;;
- .endp __divdf3
-#endif
-
-#ifdef L__divsf3
-// Compute a 32-bit IEEE float quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// farg0 holds the dividend. farg1 holds the divisor.
-
- .text
- .align 16
- .global __divsf3
- .proc __divsf3
-__divsf3:
- cmp.eq p7, p0 = r0, r0
- frcpa.s0 f10, p6 = farg0, farg1
- ;;
-(p6) cmp.ne p7, p0 = r0, r0
- .pred.rel.mutex p6, p7
-(p6) fmpy.s1 f8 = farg0, f10
-(p6) fnma.s1 f9 = farg1, f10, f1
- ;;
-(p6) fma.s1 f8 = f9, f8, f8
-(p6) fmpy.s1 f9 = f9, f9
- ;;
-(p6) fma.s1 f8 = f9, f8, f8
-(p6) fmpy.s1 f9 = f9, f9
- ;;
-(p6) fma.d.s1 f10 = f9, f8, f8
- ;;
-(p6) fnorm.s.s0 fret0 = f10
-(p7) mov fret0 = f10
- br.ret.sptk rp
- ;;
- .endp __divsf3
-#endif
-
-#ifdef L__divdi3
-// Compute a 64-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __divdi3
- .proc __divdi3
-__divdi3:
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- setf.sig f8 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- // Convert the inputs to FP, so that they won't be treated as unsigned.
- fcvt.xf f8 = f8
- fcvt.xf f9 = f9
-(p7) break 1
- ;;
- // Compute the reciprocal approximation.
- frcpa.s1 f10, p6 = f8, f9
- ;;
- // 3 Newton-Raphson iterations.
-(p6) fnma.s1 f11 = f9, f10, f1
-(p6) fmpy.s1 f12 = f8, f10
- ;;
-(p6) fmpy.s1 f13 = f11, f11
-(p6) fma.s1 f12 = f11, f12, f12
- ;;
-(p6) fma.s1 f10 = f11, f10, f10
-(p6) fma.s1 f11 = f13, f12, f12
- ;;
-(p6) fma.s1 f10 = f13, f10, f10
-(p6) fnma.s1 f12 = f9, f11, f8
- ;;
-(p6) fma.s1 f10 = f12, f10, f11
- ;;
- // Round quotient to an integer.
- fcvt.fx.trunc.s1 f10 = f10
- ;;
- // Transfer result to GP registers.
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __divdi3
-#endif
-
-#ifdef L__moddi3
-// Compute a 64-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a). in1 holds the divisor (b).
-
- .text
- .align 16
- .global __moddi3
- .proc __moddi3
-__moddi3:
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- setf.sig f14 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- // Convert the inputs to FP, so that they won't be treated as unsigned.
- fcvt.xf f8 = f14
- fcvt.xf f9 = f9
-(p7) break 1
- ;;
- // Compute the reciprocal approximation.
- frcpa.s1 f10, p6 = f8, f9
- ;;
- // 3 Newton-Raphson iterations.
-(p6) fmpy.s1 f12 = f8, f10
-(p6) fnma.s1 f11 = f9, f10, f1
- ;;
-(p6) fma.s1 f12 = f11, f12, f12
-(p6) fmpy.s1 f13 = f11, f11
- ;;
-(p6) fma.s1 f10 = f11, f10, f10
-(p6) fma.s1 f11 = f13, f12, f12
- ;;
- sub in1 = r0, in1
-(p6) fma.s1 f10 = f13, f10, f10
-(p6) fnma.s1 f12 = f9, f11, f8
- ;;
- setf.sig f9 = in1
-(p6) fma.s1 f10 = f12, f10, f11
- ;;
- fcvt.fx.trunc.s1 f10 = f10
- ;;
- // r = q * (-b) + a
- xma.l f10 = f10, f9, f14
- ;;
- // Transfer result to GP registers.
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __moddi3
-#endif
-
-#ifdef L__udivdi3
-// Compute a 64-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __udivdi3
- .proc __udivdi3
-__udivdi3:
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- setf.sig f8 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- // Convert the inputs to FP, to avoid FP software-assist faults.
- fcvt.xuf.s1 f8 = f8
- fcvt.xuf.s1 f9 = f9
-(p7) break 1
- ;;
- // Compute the reciprocal approximation.
- frcpa.s1 f10, p6 = f8, f9
- ;;
- // 3 Newton-Raphson iterations.
-(p6) fnma.s1 f11 = f9, f10, f1
-(p6) fmpy.s1 f12 = f8, f10
- ;;
-(p6) fmpy.s1 f13 = f11, f11
-(p6) fma.s1 f12 = f11, f12, f12
- ;;
-(p6) fma.s1 f10 = f11, f10, f10
-(p6) fma.s1 f11 = f13, f12, f12
- ;;
-(p6) fma.s1 f10 = f13, f10, f10
-(p6) fnma.s1 f12 = f9, f11, f8
- ;;
-(p6) fma.s1 f10 = f12, f10, f11
- ;;
- // Round quotient to an unsigned integer.
- fcvt.fxu.trunc.s1 f10 = f10
- ;;
- // Transfer result to GP registers.
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __udivdi3
-#endif
-
-#ifdef L__umoddi3
-// Compute a 64-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend (a). in1 holds the divisor (b).
-
- .text
- .align 16
- .global __umoddi3
- .proc __umoddi3
-__umoddi3:
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- setf.sig f14 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- // Convert the inputs to FP, to avoid FP software assist faults.
- fcvt.xuf.s1 f8 = f14
- fcvt.xuf.s1 f9 = f9
-(p7) break 1;
- ;;
- // Compute the reciprocal approximation.
- frcpa.s1 f10, p6 = f8, f9
- ;;
- // 3 Newton-Raphson iterations.
-(p6) fmpy.s1 f12 = f8, f10
-(p6) fnma.s1 f11 = f9, f10, f1
- ;;
-(p6) fma.s1 f12 = f11, f12, f12
-(p6) fmpy.s1 f13 = f11, f11
- ;;
-(p6) fma.s1 f10 = f11, f10, f10
-(p6) fma.s1 f11 = f13, f12, f12
- ;;
- sub in1 = r0, in1
-(p6) fma.s1 f10 = f13, f10, f10
-(p6) fnma.s1 f12 = f9, f11, f8
- ;;
- setf.sig f9 = in1
-(p6) fma.s1 f10 = f12, f10, f11
- ;;
- // Round quotient to an unsigned integer.
- fcvt.fxu.trunc.s1 f10 = f10
- ;;
- // r = q * (-b) + a
- xma.l f10 = f10, f9, f14
- ;;
- // Transfer result to GP registers.
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __umoddi3
-#endif
-
-#ifdef L__divsi3
-// Compute a 32-bit integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __divsi3
- .proc __divsi3
-__divsi3:
- .regstk 2,0,0,0
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- sxt4 in0 = in0
- sxt4 in1 = in1
- ;;
- setf.sig f8 = in0
- setf.sig f9 = in1
-(p7) break 1
- ;;
- mov r2 = 0x0ffdd
- fcvt.xf f8 = f8
- fcvt.xf f9 = f9
- ;;
- setf.exp f11 = r2
- frcpa.s1 f10, p6 = f8, f9
- ;;
-(p6) fmpy.s1 f8 = f8, f10
-(p6) fnma.s1 f9 = f9, f10, f1
- ;;
-(p6) fma.s1 f8 = f9, f8, f8
-(p6) fma.s1 f9 = f9, f9, f11
- ;;
-(p6) fma.s1 f10 = f9, f8, f8
- ;;
- fcvt.fx.trunc.s1 f10 = f10
- ;;
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __divsi3
-#endif
-
-#ifdef L__modsi3
-// Compute a 32-bit integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __modsi3
- .proc __modsi3
-__modsi3:
- .regstk 2,0,0,0
- mov r2 = 0x0ffdd
- sxt4 in0 = in0
- sxt4 in1 = in1
- ;;
- setf.sig f13 = r32
- setf.sig f9 = r33
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- sub in1 = r0, in1
- fcvt.xf f8 = f13
- fcvt.xf f9 = f9
- ;;
- setf.exp f11 = r2
- frcpa.s1 f10, p6 = f8, f9
-(p7) break 1
- ;;
-(p6) fmpy.s1 f12 = f8, f10
-(p6) fnma.s1 f10 = f9, f10, f1
- ;;
- setf.sig f9 = in1
-(p6) fma.s1 f12 = f10, f12, f12
-(p6) fma.s1 f10 = f10, f10, f11
- ;;
-(p6) fma.s1 f10 = f10, f12, f12
- ;;
- fcvt.fx.trunc.s1 f10 = f10
- ;;
- xma.l f10 = f10, f9, f13
- ;;
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __modsi3
-#endif
-
-#ifdef L__udivsi3
-// Compute a 32-bit unsigned integer quotient.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __udivsi3
- .proc __udivsi3
-__udivsi3:
- .regstk 2,0,0,0
- mov r2 = 0x0ffdd
- zxt4 in0 = in0
- zxt4 in1 = in1
- ;;
- setf.sig f8 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- fcvt.xf f8 = f8
- fcvt.xf f9 = f9
-(p7) break 1
- ;;
- setf.exp f11 = r2
- frcpa.s1 f10, p6 = f8, f9
- ;;
-(p6) fmpy.s1 f8 = f8, f10
-(p6) fnma.s1 f9 = f9, f10, f1
- ;;
-(p6) fma.s1 f8 = f9, f8, f8
-(p6) fma.s1 f9 = f9, f9, f11
- ;;
-(p6) fma.s1 f10 = f9, f8, f8
- ;;
- fcvt.fxu.trunc.s1 f10 = f10
- ;;
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __udivsi3
-#endif
-
-#ifdef L__umodsi3
-// Compute a 32-bit unsigned integer modulus.
-//
-// From the Intel IA-64 Optimization Guide, choose the minimum latency
-// alternative.
-//
-// in0 holds the dividend. in1 holds the divisor.
-
- .text
- .align 16
- .global __umodsi3
- .proc __umodsi3
-__umodsi3:
- .regstk 2,0,0,0
- mov r2 = 0x0ffdd
- zxt4 in0 = in0
- zxt4 in1 = in1
- ;;
- setf.sig f13 = in0
- setf.sig f9 = in1
- // Check divide by zero.
- cmp.ne.unc p0,p7=0,in1
- ;;
- sub in1 = r0, in1
- fcvt.xf f8 = f13
- fcvt.xf f9 = f9
- ;;
- setf.exp f11 = r2
- frcpa.s1 f10, p6 = f8, f9
-(p7) break 1;
- ;;
-(p6) fmpy.s1 f12 = f8, f10
-(p6) fnma.s1 f10 = f9, f10, f1
- ;;
- setf.sig f9 = in1
-(p6) fma.s1 f12 = f10, f12, f12
-(p6) fma.s1 f10 = f10, f10, f11
- ;;
-(p6) fma.s1 f10 = f10, f12, f12
- ;;
- fcvt.fxu.trunc.s1 f10 = f10
- ;;
- xma.l f10 = f10, f9, f13
- ;;
- getf.sig ret0 = f10
- br.ret.sptk rp
- ;;
- .endp __umodsi3
-#endif
-
-#ifdef L__save_stack_nonlocal
-// Notes on save/restore stack nonlocal: We read ar.bsp but write
-// ar.bspstore. This is because ar.bsp can be read at all times
-// (independent of the RSE mode) but since it's read-only we need to
-// restore the value via ar.bspstore. This is OK because
-// ar.bsp==ar.bspstore after executing "flushrs".
-
-// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
-
- .text
- .align 16
- .global __ia64_save_stack_nonlocal
- .proc __ia64_save_stack_nonlocal
-__ia64_save_stack_nonlocal:
- { .mmf
- alloc r18 = ar.pfs, 2, 0, 0, 0
- mov r19 = ar.rsc
- ;;
- }
- { .mmi
- flushrs
- st8 [in0] = in1, 24
- and r19 = 0x1c, r19
- ;;
- }
- { .mmi
- st8 [in0] = r18, -16
- mov ar.rsc = r19
- or r19 = 0x3, r19
- ;;
- }
- { .mmi
- mov r16 = ar.bsp
- mov r17 = ar.rnat
- adds r2 = 8, in0
- ;;
- }
- { .mmi
- st8 [in0] = r16
- st8 [r2] = r17
- }
- { .mib
- mov ar.rsc = r19
- br.ret.sptk.few rp
- ;;
- }
- .endp __ia64_save_stack_nonlocal
-#endif
-
-#ifdef L__nonlocal_goto
-// void __ia64_nonlocal_goto(void *target_label, void *save_area,
-// void *static_chain);
-
- .text
- .align 16
- .global __ia64_nonlocal_goto
- .proc __ia64_nonlocal_goto
-__ia64_nonlocal_goto:
- { .mmi
- alloc r20 = ar.pfs, 3, 0, 0, 0
- ld8 r12 = [in1], 8
- mov.ret.sptk rp = in0, .L0
- ;;
- }
- { .mmf
- ld8 r16 = [in1], 8
- mov r19 = ar.rsc
- ;;
- }
- { .mmi
- flushrs
- ld8 r17 = [in1], 8
- and r19 = 0x1c, r19
- ;;
- }
- { .mmi
- ld8 r18 = [in1]
- mov ar.rsc = r19
- or r19 = 0x3, r19
- ;;
- }
- { .mmi
- mov ar.bspstore = r16
- ;;
- mov ar.rnat = r17
- ;;
- }
- { .mmi
- loadrs
- invala
- mov r15 = in2
- ;;
- }
-.L0: { .mib
- mov ar.rsc = r19
- mov ar.pfs = r18
- br.ret.sptk.few rp
- ;;
- }
- .endp __ia64_nonlocal_goto
-#endif
-
-#ifdef L__restore_stack_nonlocal
-// This is mostly the same as nonlocal_goto above.
-// ??? This has not been tested yet.
-
-// void __ia64_restore_stack_nonlocal(void *save_area)
-
- .text
- .align 16
- .global __ia64_restore_stack_nonlocal
- .proc __ia64_restore_stack_nonlocal
-__ia64_restore_stack_nonlocal:
- { .mmf
- alloc r20 = ar.pfs, 4, 0, 0, 0
- ld8 r12 = [in0], 8
- ;;
- }
- { .mmb
- ld8 r16=[in0], 8
- mov r19 = ar.rsc
- ;;
- }
- { .mmi
- flushrs
- ld8 r17 = [in0], 8
- and r19 = 0x1c, r19
- ;;
- }
- { .mmf
- ld8 r18 = [in0]
- mov ar.rsc = r19
- ;;
- }
- { .mmi
- mov ar.bspstore = r16
- ;;
- mov ar.rnat = r17
- or r19 = 0x3, r19
- ;;
- }
- { .mmf
- loadrs
- invala
- ;;
- }
-.L0: { .mib
- mov ar.rsc = r19
- mov ar.pfs = r18
- br.ret.sptk.few rp
- ;;
- }
- .endp __ia64_restore_stack_nonlocal
-#endif
-
-#ifdef L__trampoline
-// Implement the nested function trampoline. This is out of line
-// so that we don't have to bother with flushing the icache, as
-// well as making the on-stack trampoline smaller.
-//
-// The trampoline has the following form:
-//
-// +-------------------+ >
-// TRAMP: | __ia64_trampoline | |
-// +-------------------+ > fake function descriptor
-// | TRAMP+16 | |
-// +-------------------+ >
-// | target descriptor |
-// +-------------------+
-// | static link |
-// +-------------------+
-
- .text
- .align 16
- .global __ia64_trampoline
- .proc __ia64_trampoline
-__ia64_trampoline:
- { .mmi
- ld8 r2 = [r1], 8
- ;;
- ld8 r15 = [r1]
- }
- { .mmi
- ld8 r3 = [r2], 8
- ;;
- ld8 r1 = [r2]
- mov b6 = r3
- }
- { .bbb
- br.sptk.many b6
- ;;
- }
- .endp __ia64_trampoline
-#endif
-
-// Thunks for backward compatibility.
-#ifdef L_fixtfdi
- .text
- .align 16
- .global __fixtfti
- .proc __fixtfti
-__fixtfti:
- { .bbb
- br.sptk.many __fixxfti
- ;;
- }
- .endp __fixtfti
-#endif
-#ifdef L_fixunstfdi
- .align 16
- .global __fixunstfti
- .proc __fixunstfti
-__fixunstfti:
- { .bbb
- br.sptk.many __fixunsxfti
- ;;
- }
- .endp __fixunstfti
-#endif
-#if L_floatditf
- .align 16
- .global __floattitf
- .proc __floattitf
-__floattitf:
- { .bbb
- br.sptk.many __floattixf
- ;;
- }
- .endp __floattitf
-#endif