diff options
Diffstat (limited to 'contrib/compiler-rt/lib/builtins')
354 files changed, 26095 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/builtins/README.txt b/contrib/compiler-rt/lib/builtins/README.txt new file mode 100644 index 000000000000..e603dfa05356 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/README.txt @@ -0,0 +1,346 @@ +Compiler-RT +================================ + +This directory and its subdirectories contain source code for the compiler +support routines. + +Compiler-RT is open source software. You may freely distribute it under the +terms of the license agreement found in LICENSE.txt. + +================================ + +This is a replacement library for libgcc. Each function is contained +in its own file. Each function has a corresponding unit test under +test/Unit. + +A rudimentary script to test each file is in the file called +test/Unit/test. + +Here is the specification for this library: + +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc + +Here is a synopsis of the contents of this library: + +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +// Integral bit manipulation + +di_int __ashldi3(di_int a, si_int b); // a << b +ti_int __ashlti3(ti_int a, si_int b); // a << b + +di_int __ashrdi3(di_int a, si_int b); // a >> b arithmetic (sign fill) +ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill) +di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill) +ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill) + +si_int __clzsi2(si_int a); // count leading zeros +si_int __clzdi2(di_int a); // count leading zeros +si_int __clzti2(ti_int a); // count leading zeros +si_int __ctzsi2(si_int a); // count trailing zeros +si_int __ctzdi2(di_int a); // count trailing zeros +si_int __ctzti2(ti_int a); // count trailing zeros + +si_int __ffssi2(si_int a); // find least significant 1 bit +si_int __ffsdi2(di_int a); // find least significant 1 bit +si_int __ffsti2(ti_int a); // find least significant 1 bit + +si_int __paritysi2(si_int a); // bit parity +si_int __paritydi2(di_int a); // bit parity +si_int __parityti2(ti_int a); // bit parity + +si_int __popcountsi2(si_int a); // bit population +si_int __popcountdi2(di_int a); // bit population +si_int __popcountti2(ti_int a); // bit population + +uint32_t __bswapsi2(uint32_t a); // a byteswapped +uint64_t __bswapdi2(uint64_t a); // a byteswapped + +// Integral arithmetic + +di_int __negdi2 (di_int a); // -a +ti_int __negti2 (ti_int a); // -a +di_int __muldi3 (di_int a, di_int b); // a * b +ti_int __multi3 (ti_int a, ti_int b); // a * b +si_int __divsi3 (si_int a, si_int b); // a / b signed +di_int __divdi3 (di_int a, di_int b); // a / b signed +ti_int __divti3 (ti_int a, ti_int b); // a / b signed +su_int __udivsi3 (su_int n, su_int d); // a / b unsigned +du_int __udivdi3 (du_int a, du_int b); // a / b unsigned +tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned +si_int __modsi3 (si_int a, si_int b); // a % b signed +di_int __moddi3 (di_int a, di_int b); // a % b signed +ti_int __modti3 (ti_int a, ti_int b); // a % b signed +su_int __umodsi3 (su_int a, su_int b); // a % b unsigned +du_int __umoddi3 (du_int a, du_int b); // a % b unsigned +tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned +su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned +si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed + + + +// Integral arithmetic with trapping overflow + +si_int __absvsi2(si_int a); // abs(a) +di_int __absvdi2(di_int a); // abs(a) +ti_int __absvti2(ti_int a); // abs(a) + +si_int __negvsi2(si_int a); // -a +di_int __negvdi2(di_int a); // -a +ti_int __negvti2(ti_int a); // -a + +si_int __addvsi3(si_int a, si_int b); // a + b +di_int __addvdi3(di_int a, di_int b); // a + b +ti_int __addvti3(ti_int a, ti_int b); // a + b + +si_int __subvsi3(si_int a, si_int b); // a - b +di_int __subvdi3(di_int a, di_int b); // a - b +ti_int __subvti3(ti_int a, ti_int b); // a - b + +si_int __mulvsi3(si_int a, si_int b); // a * b +di_int __mulvdi3(di_int a, di_int b); // a * b +ti_int __mulvti3(ti_int a, ti_int b); // a * b + + +// Integral arithmetic which returns if overflow + +si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range +di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range +ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to + one if result not in signed range + + +// Integral comparison: a < b -> 0 +// a == b -> 1 +// a > b -> 2 + +si_int __cmpdi2 (di_int a, di_int b); +si_int __cmpti2 (ti_int a, ti_int b); +si_int __ucmpdi2(du_int a, du_int b); +si_int __ucmpti2(tu_int a, tu_int b); + +// Integral / floating point conversion + +di_int __fixsfdi( float a); +di_int __fixdfdi( double a); +di_int __fixxfdi(long double a); + +ti_int __fixsfti( float a); +ti_int __fixdfti( double a); +ti_int __fixxfti(long double a); +uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation + +su_int __fixunssfsi( float a); +su_int __fixunsdfsi( double a); +su_int __fixunsxfsi(long double a); + +du_int __fixunssfdi( float a); +du_int __fixunsdfdi( double a); +du_int __fixunsxfdi(long double a); + +tu_int __fixunssfti( float a); +tu_int __fixunsdfti( double a); +tu_int __fixunsxfti(long double a); +uint64_t __fixunstfdi(long double input); // ppc only + +float __floatdisf(di_int a); +double __floatdidf(di_int a); +long double __floatdixf(di_int a); +long double __floatditf(int64_t a); // ppc only + +float __floattisf(ti_int a); +double __floattidf(ti_int a); +long double __floattixf(ti_int a); + +float __floatundisf(du_int a); +double __floatundidf(du_int a); +long double __floatundixf(du_int a); +long double __floatunditf(uint64_t a); // ppc only + +float __floatuntisf(tu_int a); +double __floatuntidf(tu_int a); +long double __floatuntixf(tu_int a); + +// Floating point raised to integer power + +float __powisf2( float a, si_int b); // a ^ b +double __powidf2( double a, si_int b); // a ^ b +long double __powixf2(long double a, si_int b); // a ^ b +long double __powitf2(long double a, si_int b); // ppc only, a ^ b + +// Complex arithmetic + +// (a + ib) * (c + id) + + float _Complex __mulsc3( float a, float b, float c, float d); + double _Complex __muldc3(double a, double b, double c, double d); +long double _Complex __mulxc3(long double a, long double b, + long double c, long double d); +long double _Complex __multc3(long double a, long double b, + long double c, long double d); // ppc only + +// (a + ib) / (c + id) + + float _Complex __divsc3( float a, float b, float c, float d); + double _Complex __divdc3(double a, double b, double c, double d); +long double _Complex __divxc3(long double a, long double b, + long double c, long double d); +long double _Complex __divtc3(long double a, long double b, + long double c, long double d); // ppc only + + +// Runtime support + +// __clear_cache() is used to tell process that new instructions have been +// written to an address range. Necessary on processors that do not have +// a unified instruction and data cache. +void __clear_cache(void* start, void* end); + +// __enable_execute_stack() is used with nested functions when a trampoline +// function is written onto the stack and that page range needs to be made +// executable. +void __enable_execute_stack(void* addr); + +// __gcc_personality_v0() is normally only called by the system unwinder. +// C code (as opposed to C++) normally does not need a personality function +// because there are no catch clauses or destructors to be run. But there +// is a C language extension __attribute__((cleanup(func))) which marks local +// variables as needing the cleanup function "func" to be run when the +// variable goes out of scope. That includes when an exception is thrown, +// so a personality handler is needed. +_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context); + +// for use with some implementations of assert() in <assert.h> +void __eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file); + +// for systems with emulated thread local storage +void* __emutls_get_address(struct __emutls_control*); + + +// Power PC specific functions + +// There is no C interface to the saveFP/restFP functions. They are helper +// functions called by the prolog and epilog of functions that need to save +// a number of non-volatile float point registers. +saveFP +restFP + +// PowerPC has a standard template for trampoline functions. This function +// generates a custom trampoline function with the specific realFunc +// and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + +// adds two 128-bit double-double precision values ( x + y ) +long double __gcc_qadd(long double x, long double y); + +// subtracts two 128-bit double-double precision values ( x - y ) +long double __gcc_qsub(long double x, long double y); + +// multiples two 128-bit double-double precision values ( x * y ) +long double __gcc_qmul(long double x, long double y); + +// divides two 128-bit double-double precision values ( x / y ) +long double __gcc_qdiv(long double a, long double b); + + +// ARM specific functions + +// There is no C interface to the switch* functions. These helper functions +// are only needed by Thumb1 code for efficient switch table generation. +switch16 +switch32 +switch8 +switchu8 + +// There is no C interface to the *_vfp_d8_d15_regs functions. There are +// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use +// SJLJ for exceptions, each function with a catch clause or destuctors needs +// to save and restore all registers in it prolog and epliog. But there is +// no way to access vector and high float registers from thumb1 code, so the +// compiler must add call outs to these helper functions in the prolog and +// epilog. +restore_vfp_d8_d15_regs +save_vfp_d8_d15_regs + + +// Note: long ago ARM processors did not have floating point hardware support. +// Floating point was done in software and floating point parameters were +// passed in integer registers. When hardware support was added for floating +// point, new *vfp functions were added to do the same operations but with +// floating point parameters in floating point registers. + +// Undocumented functions + +float __addsf3vfp(float a, float b); // Appears to return a + b +double __adddf3vfp(double a, double b); // Appears to return a + b +float __divsf3vfp(float a, float b); // Appears to return a / b +double __divdf3vfp(double a, double b); // Appears to return a / b +int __eqsf2vfp(float a, float b); // Appears to return one + // iff a == b and neither is NaN. +int __eqdf2vfp(double a, double b); // Appears to return one + // iff a == b and neither is NaN. +double __extendsfdf2vfp(float a); // Appears to convert from + // float to double. +int __fixdfsivfp(double a); // Appears to convert from + // double to int. +int __fixsfsivfp(float a); // Appears to convert from + // float to int. +unsigned int __fixunssfsivfp(float a); // Appears to convert from + // float to unsigned int. +unsigned int __fixunsdfsivfp(double a); // Appears to convert from + // double to unsigned int. +double __floatsidfvfp(int a); // Appears to convert from + // int to double. +float __floatsisfvfp(int a); // Appears to convert from + // int to float. +double __floatunssidfvfp(unsigned int a); // Appears to convert from + // unisgned int to double. +float __floatunssisfvfp(unsigned int a); // Appears to convert from + // unisgned int to float. +int __gedf2vfp(double a, double b); // Appears to return __gedf2 + // (a >= b) +int __gesf2vfp(float a, float b); // Appears to return __gesf2 + // (a >= b) +int __gtdf2vfp(double a, double b); // Appears to return __gtdf2 + // (a > b) +int __gtsf2vfp(float a, float b); // Appears to return __gtsf2 + // (a > b) +int __ledf2vfp(double a, double b); // Appears to return __ledf2 + // (a <= b) +int __lesf2vfp(float a, float b); // Appears to return __lesf2 + // (a <= b) +int __ltdf2vfp(double a, double b); // Appears to return __ltdf2 + // (a < b) +int __ltsf2vfp(float a, float b); // Appears to return __ltsf2 + // (a < b) +double __muldf3vfp(double a, double b); // Appears to return a * b +float __mulsf3vfp(float a, float b); // Appears to return a * b +int __nedf2vfp(double a, double b); // Appears to return __nedf2 + // (a != b) +double __negdf2vfp(double a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +double __subdf3vfp(double a, double b); // Appears to return a - b +float __subsf3vfp(float a, float b); // Appears to return a - b +float __truncdfsf2vfp(double a); // Appears to convert from + // double to float. +int __unorddf2vfp(double a, double b); // Appears to return __unorddf2 +int __unordsf2vfp(float a, float b); // Appears to return __unordsf2 + + +Preconditions are listed for each function at the definition when there are any. +Any preconditions reflect the specification at +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. + +Assumptions are listed in "int_lib.h", and in individual files. Where possible +assumptions are checked at compile time. diff --git a/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S b/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S new file mode 100644 index 000000000000..89ec90b08a13 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S @@ -0,0 +1,34 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// __chkstk routine +// This routine is windows specific. +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// This clobbers registers x16 and x17. +// Does not modify any memory or the stack pointer. + +// mov x15, #256 // Number of bytes of stack, in units of 16 byte +// bl __chkstk +// sub sp, sp, x15, lsl #4 + +#ifdef __aarch64__ + +#define PAGE_SIZE 4096 + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__chkstk) + lsl x16, x15, #4 + mov x17, sp +1: + sub x17, x17, #PAGE_SIZE + subs x16, x16, #PAGE_SIZE + ldr xzr, [x17] + b.gt 1b + + ret +END_COMPILERRT_FUNCTION(__chkstk) + +#endif // __aarch64__ diff --git a/contrib/compiler-rt/lib/builtins/absvdi2.c b/contrib/compiler-rt/lib/builtins/absvdi2.c new file mode 100644 index 000000000000..682c2355d2ac --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/absvdi2.c @@ -0,0 +1,29 @@ +/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __absvdi2 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI di_int +__absvdi2(di_int a) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)1 << (N-1))) + compilerrt_abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/contrib/compiler-rt/lib/builtins/absvsi2.c b/contrib/compiler-rt/lib/builtins/absvsi2.c new file mode 100644 index 000000000000..4812af815982 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/absvsi2.c @@ -0,0 +1,29 @@ +/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __absvsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI si_int +__absvsi2(si_int a) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + if (a == (1 << (N-1))) + compilerrt_abort(); + const si_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/contrib/compiler-rt/lib/builtins/absvti2.c b/contrib/compiler-rt/lib/builtins/absvti2.c new file mode 100644 index 000000000000..7927770c9ab3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/absvti2.c @@ -0,0 +1,34 @@ +/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __absvti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: absolute value */ + +/* Effects: aborts if abs(x) < 0 */ + +COMPILER_RT_ABI ti_int +__absvti2(ti_int a) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + if (a == ((ti_int)1 << (N-1))) + compilerrt_abort(); + const ti_int s = a >> (N - 1); + return (a ^ s) - s; +} + +#endif /* CRT_HAS_128BIT */ + diff --git a/contrib/compiler-rt/lib/builtins/adddf3.c b/contrib/compiler-rt/lib/builtins/adddf3.c new file mode 100644 index 000000000000..9a3901312e51 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/adddf3.c @@ -0,0 +1,30 @@ +//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI double __adddf3(double a, double b){ + return __addXf3__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_dadd(double a, double b) { + return __adddf3(a, b); +} +#else +AEABI_RTABI double __aeabi_dadd(double a, double b) COMPILER_RT_ALIAS(__adddf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/addsf3.c b/contrib/compiler-rt/lib/builtins/addsf3.c new file mode 100644 index 000000000000..c5c1a41c3611 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/addsf3.c @@ -0,0 +1,30 @@ +//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI float __addsf3(float a, float b) { + return __addXf3__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_fadd(float a, float b) { + return __addsf3(a, b); +} +#else +AEABI_RTABI float __aeabi_fadd(float a, float b) COMPILER_RT_ALIAS(__addsf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/addtf3.c b/contrib/compiler-rt/lib/builtins/addtf3.c new file mode 100644 index 000000000000..e4bbe0227ae6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/addtf3.c @@ -0,0 +1,25 @@ +//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float addition with the IEEE-754 +// default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#include "fp_add_impl.inc" + +COMPILER_RT_ABI long double __addtf3(long double a, long double b){ + return __addXf3__(a, b); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/addvdi3.c b/contrib/compiler-rt/lib/builtins/addvdi3.c new file mode 100644 index 000000000000..0da38945679d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/addvdi3.c @@ -0,0 +1,36 @@ +/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI di_int +__addvdi3(di_int a, di_int b) +{ + di_int s = (du_int) a + (du_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/compiler-rt/lib/builtins/addvsi3.c b/contrib/compiler-rt/lib/builtins/addvsi3.c new file mode 100644 index 000000000000..94ca726f42b9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/addvsi3.c @@ -0,0 +1,36 @@ +/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI si_int +__addvsi3(si_int a, si_int b) +{ + si_int s = (su_int) a + (su_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/compiler-rt/lib/builtins/addvti3.c b/contrib/compiler-rt/lib/builtins/addvti3.c new file mode 100644 index 000000000000..c224de60aab0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/addvti3.c @@ -0,0 +1,40 @@ +/* ===-- addvti3.c - Implement __addvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __addvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a + b */ + +/* Effects: aborts if a + b overflows */ + +COMPILER_RT_ABI ti_int +__addvti3(ti_int a, ti_int b) +{ + ti_int s = (tu_int) a + (tu_int) b; + if (b >= 0) + { + if (s < a) + compilerrt_abort(); + } + else + { + if (s >= a) + compilerrt_abort(); + } + return s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/apple_versioning.c b/contrib/compiler-rt/lib/builtins/apple_versioning.c new file mode 100644 index 000000000000..3797a1ab02da --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/apple_versioning.c @@ -0,0 +1,350 @@ +/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + + +#if __APPLE__ + #include <Availability.h> + + #if __IPHONE_OS_VERSION_MIN_REQUIRED + #define NOT_HERE_BEFORE_10_6(sym) + #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ + extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ + extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ + extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp50 = 0; + #else + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; + #endif + + +/* Symbols in libSystem.dylib in 10.6 and later, + * but are in libgcc_s.dylib in earlier versions + */ + +NOT_HERE_BEFORE_10_6(__absvdi2) +NOT_HERE_BEFORE_10_6(__absvsi2) +NOT_HERE_BEFORE_10_6(__absvti2) +NOT_HERE_BEFORE_10_6(__addvdi3) +NOT_HERE_BEFORE_10_6(__addvsi3) +NOT_HERE_BEFORE_10_6(__addvti3) +NOT_HERE_BEFORE_10_6(__ashldi3) +NOT_HERE_BEFORE_10_6(__ashlti3) +NOT_HERE_BEFORE_10_6(__ashrdi3) +NOT_HERE_BEFORE_10_6(__ashrti3) +NOT_HERE_BEFORE_10_6(__clear_cache) +NOT_HERE_BEFORE_10_6(__clzdi2) +NOT_HERE_BEFORE_10_6(__clzsi2) +NOT_HERE_BEFORE_10_6(__clzti2) +NOT_HERE_BEFORE_10_6(__cmpdi2) +NOT_HERE_BEFORE_10_6(__cmpti2) +NOT_HERE_BEFORE_10_6(__ctzdi2) +NOT_HERE_BEFORE_10_6(__ctzsi2) +NOT_HERE_BEFORE_10_6(__ctzti2) +NOT_HERE_BEFORE_10_6(__divdc3) +NOT_HERE_BEFORE_10_6(__divdi3) +NOT_HERE_BEFORE_10_6(__divsc3) +NOT_HERE_BEFORE_10_6(__divtc3) +NOT_HERE_BEFORE_10_6(__divti3) +NOT_HERE_BEFORE_10_6(__divxc3) +NOT_HERE_BEFORE_10_6(__enable_execute_stack) +NOT_HERE_BEFORE_10_6(__ffsdi2) +NOT_HERE_BEFORE_10_6(__ffsti2) +NOT_HERE_BEFORE_10_6(__fixdfdi) +NOT_HERE_BEFORE_10_6(__fixdfti) +NOT_HERE_BEFORE_10_6(__fixsfdi) +NOT_HERE_BEFORE_10_6(__fixsfti) +NOT_HERE_BEFORE_10_6(__fixtfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfsi) +NOT_HERE_BEFORE_10_6(__fixunsdfti) +NOT_HERE_BEFORE_10_6(__fixunssfdi) +NOT_HERE_BEFORE_10_6(__fixunssfsi) +NOT_HERE_BEFORE_10_6(__fixunssfti) +NOT_HERE_BEFORE_10_6(__fixunstfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfsi) +NOT_HERE_BEFORE_10_6(__fixunsxfti) +NOT_HERE_BEFORE_10_6(__fixxfdi) +NOT_HERE_BEFORE_10_6(__fixxfti) +NOT_HERE_BEFORE_10_6(__floatdidf) +NOT_HERE_BEFORE_10_6(__floatdisf) +NOT_HERE_BEFORE_10_6(__floatditf) +NOT_HERE_BEFORE_10_6(__floatdixf) +NOT_HERE_BEFORE_10_6(__floattidf) +NOT_HERE_BEFORE_10_6(__floattisf) +NOT_HERE_BEFORE_10_6(__floattixf) +NOT_HERE_BEFORE_10_6(__floatundidf) +NOT_HERE_BEFORE_10_6(__floatundisf) +NOT_HERE_BEFORE_10_6(__floatunditf) +NOT_HERE_BEFORE_10_6(__floatundixf) +NOT_HERE_BEFORE_10_6(__floatuntidf) +NOT_HERE_BEFORE_10_6(__floatuntisf) +NOT_HERE_BEFORE_10_6(__floatuntixf) +NOT_HERE_BEFORE_10_6(__gcc_personality_v0) +NOT_HERE_BEFORE_10_6(__lshrdi3) +NOT_HERE_BEFORE_10_6(__lshrti3) +NOT_HERE_BEFORE_10_6(__moddi3) +NOT_HERE_BEFORE_10_6(__modti3) +NOT_HERE_BEFORE_10_6(__muldc3) +NOT_HERE_BEFORE_10_6(__muldi3) +NOT_HERE_BEFORE_10_6(__mulsc3) +NOT_HERE_BEFORE_10_6(__multc3) +NOT_HERE_BEFORE_10_6(__multi3) +NOT_HERE_BEFORE_10_6(__mulvdi3) +NOT_HERE_BEFORE_10_6(__mulvsi3) +NOT_HERE_BEFORE_10_6(__mulvti3) +NOT_HERE_BEFORE_10_6(__mulxc3) +NOT_HERE_BEFORE_10_6(__negdi2) +NOT_HERE_BEFORE_10_6(__negti2) +NOT_HERE_BEFORE_10_6(__negvdi2) +NOT_HERE_BEFORE_10_6(__negvsi2) +NOT_HERE_BEFORE_10_6(__negvti2) +NOT_HERE_BEFORE_10_6(__paritydi2) +NOT_HERE_BEFORE_10_6(__paritysi2) +NOT_HERE_BEFORE_10_6(__parityti2) +NOT_HERE_BEFORE_10_6(__popcountdi2) +NOT_HERE_BEFORE_10_6(__popcountsi2) +NOT_HERE_BEFORE_10_6(__popcountti2) +NOT_HERE_BEFORE_10_6(__powidf2) +NOT_HERE_BEFORE_10_6(__powisf2) +NOT_HERE_BEFORE_10_6(__powitf2) +NOT_HERE_BEFORE_10_6(__powixf2) +NOT_HERE_BEFORE_10_6(__subvdi3) +NOT_HERE_BEFORE_10_6(__subvsi3) +NOT_HERE_BEFORE_10_6(__subvti3) +NOT_HERE_BEFORE_10_6(__ucmpdi2) +NOT_HERE_BEFORE_10_6(__ucmpti2) +NOT_HERE_BEFORE_10_6(__udivdi3) +NOT_HERE_BEFORE_10_6(__udivmoddi4) +NOT_HERE_BEFORE_10_6(__udivmodti4) +NOT_HERE_BEFORE_10_6(__udivti3) +NOT_HERE_BEFORE_10_6(__umoddi3) +NOT_HERE_BEFORE_10_6(__umodti3) + + +#if __ppc__ +NOT_HERE_BEFORE_10_6(__gcc_qadd) +NOT_HERE_BEFORE_10_6(__gcc_qdiv) +NOT_HERE_BEFORE_10_6(__gcc_qmul) +NOT_HERE_BEFORE_10_6(__gcc_qsub) +NOT_HERE_BEFORE_10_6(__trampoline_setup) +#endif /* __ppc__ */ + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) + + +#if __arm__ && __DYNAMIC__ + #define NOT_HERE_UNTIL_AFTER_4_3(sym) \ + extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ + extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ + extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ + extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; + +NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__addvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashldi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2) +NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__divdc3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divdi3) +NOT_HERE_UNTIL_AFTER_4_3(__divsc3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divsi3) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatdidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatdisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatundidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatundisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__moddi3) +NOT_HERE_UNTIL_AFTER_4_3(__modsi3) +NOT_HERE_UNTIL_AFTER_4_3(__muldc3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__muldi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsc3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__negdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__paritydi2) +NOT_HERE_UNTIL_AFTER_4_3(__paritysi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2) +NOT_HERE_UNTIL_AFTER_4_3(__powidf2) +NOT_HERE_UNTIL_AFTER_4_3(__powisf2) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__subvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__udivdi3) +NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivsi3) +NOT_HERE_UNTIL_AFTER_4_3(__umoddi3) +NOT_HERE_UNTIL_AFTER_4_3(__umodsi3) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp) + +NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) +#endif // __arm__ && __DYNAMIC__ + + + + + +#else /* !__APPLE__ */ + +extern int avoid_empty_file; + +#endif /* !__APPLE__*/ diff --git a/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S new file mode 100644 index 000000000000..8e476cad1624 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S @@ -0,0 +1,33 @@ +//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// double __adddf3vfp(double a, double b) { return a + b; } +// +// Adds two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vadd.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__adddf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3.S b/contrib/compiler-rt/lib/builtins/arm/addsf3.S new file mode 100644 index 000000000000..74723cbeff74 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/addsf3.S @@ -0,0 +1,277 @@ +/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __addsf3 (single precision floating pointer number + * addition with the IEEE-754 default rounding (to nearest, ties to even) + * function for the ARM Thumb1 ISA. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" +#define significandBits 23 +#define typeWidth 32 + + .syntax unified + .text + .thumb + .p2align 2 + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) + push {r4, r5, r6, r7, lr} + // Get the absolute value of a and b. + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r2, r2, #1 /* aAbs */ + beq LOCAL_LABEL(a_zero_nan_inf) + lsrs r3, r3, #1 /* bAbs */ + beq LOCAL_LABEL(zero_nan_inf) + + // Detect if a or b is infinity or Nan. + lsrs r6, r2, #(significandBits) + lsrs r7, r3, #(significandBits) + cmp r6, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + cmp r7, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + + // Swap Rep and Abs so that a and aAbs has the larger absolute value. + cmp r2, r3 + bhs LOCAL_LABEL(no_swap) + movs r4, r0 + movs r5, r2 + movs r0, r1 + movs r2, r3 + movs r1, r4 + movs r3, r5 +LOCAL_LABEL(no_swap): + + // Get the significands and shift them to give us round, guard and sticky. + lsls r4, r0, #(typeWidth - significandBits) + lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */ + lsls r5, r1, #(typeWidth - significandBits) + lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */ + + // Get the implicitBit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3) + + // Get aExponent and set implicit bit if necessary. + lsrs r2, r2, #(significandBits) + beq LOCAL_LABEL(a_done_implicit_bit) + orrs r4, r6 +LOCAL_LABEL(a_done_implicit_bit): + + // Get bExponent and set implicit bit if necessary. + lsrs r3, r3, #(significandBits) + beq LOCAL_LABEL(b_done_implicit_bit) + orrs r5, r6 +LOCAL_LABEL(b_done_implicit_bit): + + // Get the difference in exponents. + subs r6, r2, r3 + beq LOCAL_LABEL(done_align) + + // If b is denormal, then a must be normal as align > 0, and we only need to + // right shift bSignificand by (align - 1) bits. + cmp r3, #0 + bne 1f + subs r6, r6, #1 +1: + + // No longer needs bExponent. r3 is dead here. + // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). + movs r3, #(typeWidth) + subs r3, r3, r6 + movs r7, r5 + lsls r7, r3 + beq 1f + movs r7, #1 +1: + + // bSignificand = bSignificand >> align | sticky; + lsrs r5, r6 + orrs r5, r7 + bne LOCAL_LABEL(done_align) + movs r5, #1 // sticky; b is known to be non-zero. + +LOCAL_LABEL(done_align): + // isSubtraction = (aRep ^ bRep) >> 31; + movs r7, r0 + eors r7, r1 + lsrs r7, #31 + bne LOCAL_LABEL(do_substraction) + + // Same sign, do Addition. + + // aSignificand += bSignificand; + adds r4, r4, r5 + + // Check carry bit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3 + 1) + movs r7, r4 + ands r7, r6 + beq LOCAL_LABEL(form_result) + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + movs r7, r4 + movs r6, #1 + ands r7, r6 // sticky = aSignificand & 1; + lsrs r4, #1 + orrs r4, r7 // result Significand + adds r2, #1 // result Exponent + // If we have overflowed the type, return +/- infinity. + cmp r2, 0xFF + beq LOCAL_LABEL(ret_inf) + +LOCAL_LABEL(form_result): + // Shift the sign, exponent and significand into place. + lsrs r0, #(typeWidth - 1) + lsls r0, #(typeWidth - 1) // Get Sign. + lsls r2, #(significandBits) + orrs r0, r2 + movs r1, r4 + lsls r4, #(typeWidth - significandBits - 3) + lsrs r4, #(typeWidth - significandBits) + orrs r0, r4 + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + // roundGuardSticky = aSignificand & 0x7; + movs r2, #0x7 + ands r1, r2 + // if (roundGuardSticky > 0x4) result++; + + cmp r1, #0x4 + blt LOCAL_LABEL(done_round) + beq 1f + adds r0, #1 + pop {r4, r5, r6, r7, pc} +1: + + // if (roundGuardSticky == 0x4) result += result & 1; + movs r1, r0 + lsrs r1, #1 + bcc LOCAL_LABEL(done_round) + adds r0, r0, #1 +LOCAL_LABEL(done_round): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(do_substraction): + subs r4, r4, r5 // aSignificand -= bSignificand; + beq LOCAL_LABEL(ret_zero) + movs r6, r4 + cmp r2, 0 + beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + lsrs r6, r6, #(significandBits + 3) + bne LOCAL_LABEL(form_result) + + push {r0, r1, r2, r3} + movs r0, r4 + bl SYMBOL_NAME(__clzsi2) + movs r5, r0 + pop {r0, r1, r2, r3} + // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + subs r5, r5, #(typeWidth - significandBits - 3 - 1) + // aSignificand <<= shift; aExponent -= shift; + lsls r4, r5 + subs r2, r2, r5 + bgt LOCAL_LABEL(form_result) + + // Do normalization if aExponent <= 0. + movs r6, #1 + subs r6, r6, r2 // 1 - aExponent; + movs r2, #0 // aExponent = 0; + movs r3, #(typeWidth) // bExponent is dead. + subs r3, r3, r6 + movs r7, r4 + lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) + beq 1f + movs r7, #1 +1: + lsrs r4, r6 /* aSignificand >> shift */ + orrs r4, r7 + b LOCAL_LABEL(form_result) + +LOCAL_LABEL(ret_zero): + movs r0, #0 + pop {r4, r5, r6, r7, pc} + + +LOCAL_LABEL(a_zero_nan_inf): + lsrs r3, r3, #1 + +LOCAL_LABEL(zero_nan_inf): + // Here r2 has aAbs, r3 has bAbs + movs r4, #0xFF + lsls r4, r4, #(significandBits) // Make +inf. + + cmp r2, r4 + bhi LOCAL_LABEL(a_is_nan) + cmp r3, r4 + bhi LOCAL_LABEL(b_is_nan) + + cmp r2, r4 + bne LOCAL_LABEL(a_is_rational) + // aAbs is INF. + eors r1, r0 // aRep ^ bRep. + movs r6, #1 + lsls r6, r6, #(typeWidth - 1) // get sign mask. + cmp r1, r6 // if they only differ on sign bit, it's -INF + INF + beq LOCAL_LABEL(a_is_nan) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(a_is_rational): + cmp r3, r4 + bne LOCAL_LABEL(b_is_rational) + movs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_rational): + // either a or b or both are zero. + adds r4, r2, r3 + beq LOCAL_LABEL(both_zero) + cmp r2, #0 // is absA 0 ? + beq LOCAL_LABEL(ret_b) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(both_zero): + ands r0, r1 // +0 + -0 = +0 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_b): + movs r0, r1 + +LOCAL_LABEL(ret): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_nan): + movs r0, r1 +LOCAL_LABEL(a_is_nan): + movs r1, #1 + lsls r1, r1, #(significandBits -1) // r1 is quiet bit. + orrs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_inf): + movs r4, #0xFF + lsls r4, r4, #(significandBits) + orrs r0, r4 + lsrs r0, r0, #(significandBits) + lsls r0, r0, #(significandBits) + pop {r4, r5, r6, r7, pc} + + +END_COMPILERRT_FUNCTION(__addsf3) + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S new file mode 100644 index 000000000000..8871efdcc5d1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S @@ -0,0 +1,33 @@ +//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __addsf3vfp(float a, float b); +// +// Adds two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed in GPRs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vadd.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__addsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S new file mode 100644 index 000000000000..adc2d55d90f5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S @@ -0,0 +1,145 @@ +//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +#error big endian support not implemented +#endif + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cdcmpeq(double a, double b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cdcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + push {r0-r3, lr} + bl __aeabi_cdcmpeq_check_nan + cmp r0, #1 +#if defined(USE_THUMB_1) + beq 1f + // NaN has been ruled out, so __aeabi_cdcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cdcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cdcmple can't trap + // Use "it ne" + unconditional branch to guarantee a supported relocation if + // __aeabi_cdcmple is in a different section for some builds. + IT(ne) + bne __aeabi_cdcmple + +#if defined(USE_THUMB_2) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else + msr APSR_nzcvq, #APSR_C +#endif + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + + +// void __aeabi_cdcmple(double a, double b) { +// if (__aeabi_dcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_dcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_dcmplt + cmp r0, #1 +#if defined(USE_THUMB_1) + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + ITE(eq) + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr APSR_nzcvq, ip + pop {r0-r3} + POP_PC() +#endif +END_COMPILERRT_FUNCTION(__aeabi_cdcmple) + +// int __aeabi_cdrcmple(double a, double b) { +// return __aeabi_cdcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + // Swap r0 and r2 + mov ip, r0 + mov r0, r2 + mov r2, ip + + // Swap r1 and r3 + mov ip, r1 + mov r1, r3 + mov r3, ip + + b __aeabi_cdcmple +END_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c new file mode 100644 index 000000000000..7578433a1df7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c @@ -0,0 +1,16 @@ +//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> +#include "../int_lib.h" + +AEABI_RTABI __attribute__((visibility("hidden"))) +int __aeabi_cdcmpeq_check_nan(double a, double b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S new file mode 100644 index 000000000000..4b1de997687f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S @@ -0,0 +1,140 @@ +//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +#error big endian support not implemented +#endif + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cfcmpeq(float a, float b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cfcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + push {r0-r3, lr} + bl __aeabi_cfcmpeq_check_nan + cmp r0, #1 +#if defined(USE_THUMB_1) + beq 1f + // NaN has been ruled out, so __aeabi_cfcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cfcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cfcmple can't trap + // Use "it ne" + unconditional branch to guarantee a supported relocation if + // __aeabi_cfcmple is in a different section for some builds. + IT(ne) + bne __aeabi_cfcmple + +#if defined(USE_THUMB_2) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else + msr APSR_nzcvq, #APSR_C +#endif + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + + +// void __aeabi_cfcmple(float a, float b) { +// if (__aeabi_fcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_fcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_fcmplt + cmp r0, #1 +#if defined(USE_THUMB_1) + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + ITE(eq) + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr APSR_nzcvq, ip + pop {r0-r3} + POP_PC() +#endif +END_COMPILERRT_FUNCTION(__aeabi_cfcmple) + +// int __aeabi_cfrcmple(float a, float b) { +// return __aeabi_cfcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + // Swap r0 and r1 + mov ip, r0 + mov r0, r1 + mov r1, ip + + b __aeabi_cfcmple +END_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c new file mode 100644 index 000000000000..43dde9a49597 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c @@ -0,0 +1,16 @@ +//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> +#include "../int_lib.h" + +AEABI_RTABI __attribute__((visibility("hidden"))) +int __aeabi_cfcmpeq_check_nan(float a, float b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S new file mode 100644 index 000000000000..9fa78b461248 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S @@ -0,0 +1,52 @@ +//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { +// int result = __{eq,lt,le,ge,gt}df2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \ + vmov d0, r0, r1 SEPARATOR \ + vmov d1, r2, r3 +#else +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS +#endif + +#define DEFINE_AEABI_DCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) + +DEFINE_AEABI_DCMP(eq) +DEFINE_AEABI_DCMP(lt) +DEFINE_AEABI_DCMP(le) +DEFINE_AEABI_DCMP(ge) +DEFINE_AEABI_DCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c new file mode 100644 index 000000000000..dc3031326e37 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c @@ -0,0 +1,45 @@ +/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements the division by zero helper routines as specified by the + * Run-time ABI for the ARM Architecture. + * + * ===----------------------------------------------------------------------=== + */ + +/* + * RTABI 4.3.2 - Division by zero + * + * The *div0 functions: + * - Return the value passed to them as a parameter + * - Or, return a fixed value defined by the execution environment (such as 0) + * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return + * + * An application may provide its own implementations of the *div0 functions to + * for a particular behaviour from the *div and *divmod functions called out of + * line. + */ + +#include "../int_lib.h" + +/* provide an unused declaration to pacify pendantic compilation */ +extern unsigned char declaration; + +#if defined(__ARM_EABI__) +AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_idiv0(int return_value) { + return return_value; +} + +AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_ldiv0(long long return_value) { + return return_value; +} +#endif + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c new file mode 100644 index 000000000000..1254886086fb --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c @@ -0,0 +1,19 @@ +//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "../fp_lib.h" + +AEABI_RTABI fp_t +__aeabi_dsub(fp_t, fp_t); + +AEABI_RTABI fp_t +__aeabi_drsub(fp_t a, fp_t b) { + return __aeabi_dsub(b, a); +} diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S new file mode 100644 index 000000000000..ea5b96c21d57 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S @@ -0,0 +1,52 @@ +//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { +// int result = __{eq,lt,le,ge,gt}sf2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \ + vmov s0, r0 SEPARATOR \ + vmov s1, r1 +#else +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS +#endif + +#define DEFINE_AEABI_FCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) + +DEFINE_AEABI_FCMP(eq) +DEFINE_AEABI_FCMP(lt) +DEFINE_AEABI_FCMP(le) +DEFINE_AEABI_FCMP(ge) +DEFINE_AEABI_FCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c new file mode 100644 index 000000000000..34f2303745bc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c @@ -0,0 +1,19 @@ +//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "../fp_lib.h" + +AEABI_RTABI fp_t +__aeabi_fsub(fp_t, fp_t); + +AEABI_RTABI fp_t +__aeabi_frsub(fp_t a, fp_t b) { + return __aeabi_fsub(b, a); +} diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S new file mode 100644 index 000000000000..9c9c80ab5a7b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S @@ -0,0 +1,51 @@ +//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { +// int rem, quot; +// quot = __divmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_idivmod __rt_sdiv +#endif + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) +#if defined(USE_THUMB_1) + push {r0, r1, lr} + bl SYMBOL_NAME(__divsi3) + pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +#else // defined(USE_THUMB_1) + push { lr } + sub sp, sp, #4 + mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif + bl SYMBOL_NAME(__divmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +#endif // defined(USE_THUMB_1) +END_COMPILERRT_FUNCTION(__aeabi_idivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S new file mode 100644 index 000000000000..038ae5d723a3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S @@ -0,0 +1,46 @@ +//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int64_t quot, int64_t rem} +// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { +// int64_t rem, quot; +// quot = __divmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_ldivmod __rt_sdiv64 +#endif + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif + bl SYMBOL_NAME(__divmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_ldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S new file mode 100644 index 000000000000..e86d6113760e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S @@ -0,0 +1,30 @@ +//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memcmp + pop {r7, pc} +#else + b memcmp +#endif +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S new file mode 100644 index 000000000000..e83c5fd4dbb3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S @@ -0,0 +1,30 @@ +//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memcpy + pop {r7, pc} +#else + b memcpy +#endif +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S new file mode 100644 index 000000000000..ee28300e46f2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S @@ -0,0 +1,29 @@ +//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memmove + pop {r7, pc} +#else + b memmove +#endif +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S new file mode 100644 index 000000000000..0a678d7627e7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S @@ -0,0 +1,50 @@ +//===-- aeabi_memset.S - EABI memset implementation -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 +#ifdef USE_THUMB_1 + push {r7, lr} + bl memset + pop {r7, pc} +#else + b memset +#endif +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + movs r1, #0 +#ifdef USE_THUMB_1 + push {r7, lr} + bl memset + pop {r7, pc} +#else + b memset +#endif +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S new file mode 100644 index 000000000000..88a4a6d8bc12 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S @@ -0,0 +1,58 @@ +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_uidivmod __rt_udiv +#endif + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) +#if defined(USE_THUMB_1) + cmp r0, r1 + bcc LOCAL_LABEL(case_denom_larger) + push {r0, r1, lr} + bl SYMBOL_NAME(__aeabi_uidiv) + pop {r1, r2, r3} + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +LOCAL_LABEL(case_denom_larger): + movs r1, r0 + movs r0, #0 + JMP (lr) +#else // defined(USE_THUMB_1) + push { lr } + sub sp, sp, #4 + mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +#endif +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S new file mode 100644 index 000000000000..be343b6bc826 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S @@ -0,0 +1,46 @@ +//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { uint64_t quot, uint64_t rem} +// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { +// uint64_t rem, quot; +// quot = __udivmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_uldivmod __rt_udiv64 +#endif + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif + bl SYMBOL_NAME(__udivmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_uldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S new file mode 100644 index 000000000000..e9db8bac7994 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S @@ -0,0 +1,44 @@ +//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +// +// extern uint64_t __bswapdi2(uint64_t); +// +// Reverse all the bytes in a 64-bit integer. +// + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__bswapdi2) +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + // r2 = rev(r0) + eor r2, r0, r0, ror #16 + bic r2, r2, #0xff0000 + mov r2, r2, lsr #8 + eor r2, r2, r0, ror #8 + // r0 = rev(r1) + eor r0, r1, r1, ror #16 + bic r0, r0, #0xff0000 + mov r0, r0, lsr #8 + eor r0, r0, r1, ror #8 +#else + rev r2, r0 // r2 = rev(r0) + rev r0, r1 // r0 = rev(r1) +#endif + mov r1, r2 // r1 = r2 = rev(r0) + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapdi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S new file mode 100644 index 000000000000..1f6eed5c1bbf --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S @@ -0,0 +1,36 @@ +//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +// +// extern uint32_t __bswapsi2(uint32_t); +// +// Reverse all the bytes in a 32-bit integer. +// + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__bswapsi2) +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 +#else + rev r0, r0 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapsi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/chkstk.S b/contrib/compiler-rt/lib/builtins/arm/chkstk.S new file mode 100644 index 000000000000..e3002105897e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/chkstk.S @@ -0,0 +1,34 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// __chkstk routine +// This routine is windows specific. +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// This clobbers the register r12, and the condition codes, and uses r5 and r6 +// as temporaries by backing them up and restoring them afterwards. +// Does not modify any memory or the stack pointer. + +// movw r4, #256 // Number of bytes of stack, in units of 4 byte +// bl __chkstk +// sub.w sp, sp, r4 + +#define PAGE_SIZE 4096 + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__chkstk) + lsl r4, r4, #2 + mov r12, sp + push {r5, r6} + mov r5, r4 +1: + sub r12, r12, #PAGE_SIZE + subs r5, r5, #PAGE_SIZE + ldr r6, [r12] + bgt 1b + + pop {r5, r6} + bx lr +END_COMPILERRT_FUNCTION(__chkstk) diff --git a/contrib/compiler-rt/lib/builtins/arm/clzdi2.S b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S new file mode 100644 index 000000000000..fc03b385cdfa --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S @@ -0,0 +1,93 @@ +/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 64bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__clzdi2) +#ifdef __ARM_FEATURE_CLZ +#ifdef __ARMEB__ + cmp r0, 0 + itee ne + clzne r0, r0 + clzeq r0, r1 + addeq r0, r0, 32 +#else + cmp r1, 0 + itee ne + clzne r0, r1 + clzeq r0, r0 + addeq r0, r0, 32 +#endif + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: upper half of n, overwritten after check + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ +#ifdef __ARMEB__ + cmp r0, 0 + moveq r0, r1 +#else + cmp r1, 0 + movne r0, r1 +#endif + movne r1, 1 + moveq r1, 33 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzdi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/clzsi2.S b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S new file mode 100644 index 000000000000..f2ce59c90119 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S @@ -0,0 +1,73 @@ +/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements count leading zeros for 32bit arguments. + * + * ===----------------------------------------------------------------------=== + */ +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__clzsi2) +#ifdef __ARM_FEATURE_CLZ + clz r0, r0 + JMP(lr) +#else + /* Assumption: n != 0 */ + + /* + * r0: n + * r1: count of leading zeros in n + 1 + * r2: scratch register for shifted r0 + */ + mov r1, 1 + + /* + * Basic block: + * if ((r0 >> SHIFT) == 0) + * r1 += SHIFT; + * else + * r0 >>= SHIFT; + * for descending powers of two as SHIFT. + */ + +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + /* + * The basic block invariants at this point are (r0 >> 2) == 0 and + * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + * + * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + * ---+----------------+----------------+------------+-------------- + * 1 | 1 | 0 | 0 | 1 + * 2 | 0 | 1 | -1 | 0 + * 3 | 0 | 1 | -1 | 0 + * + * The r1's initial value of 1 compensates for the 1 here. + */ + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzsi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S new file mode 100644 index 000000000000..c6c4cc067f07 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S @@ -0,0 +1,296 @@ +//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, with multiple names. +// +// The routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + .syntax unified + .text + DEFINE_CODE_STATE + +@ int __eqsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif + // Make copies of a and b with the sign bit shifted off the top. These will + // be used to detect zeros and NaNs. +#if defined(USE_THUMB_1) + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 +#else + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 +#endif + + // We do the comparison in three stages (ignoring NaN values for the time + // being). First, we orr the absolute values of a and b; this sets the Z + // flag if both a and b are zero (of either sign). The shift of r3 doesn't + // effect this at all, but it *does* make sure that the C flag is clear for + // the subsequent operations. +#if defined(USE_THUMB_1) + lsrs r6, r3, #1 + orrs r6, r2 +#else + orrs r12, r2, r3, lsr #1 +#endif + // Next, we check if a and b have the same or different signs. If they have + // opposite signs, this eor will set the N flag. +#if defined(USE_THUMB_1) + beq 1f + movs r6, r0 + eors r6, r1 +1: +#else + it ne + eorsne r12, r0, r1 +#endif + + // If a and b are equal (either both zeros or bit identical; again, we're + // ignoring NaNs for now), this subtract will zero out r0. If they have the + // same sign, the flags are updated as they would be for a comparison of the + // absolute values of a and b. +#if defined(USE_THUMB_1) + bmi 1f + subs r0, r2, r3 +1: +#else + it pl + subspl r0, r2, r3 +#endif + + // If a is smaller in magnitude than b and both have the same sign, place + // the negation of the sign of b in r0. Thus, if both are negative and + // a > b, this sets r0 to 0; if both are positive and a < b, this sets + // r0 to -1. + // + // This is also done if a and b have opposite signs and are not both zero, + // because in that case the subtract was not performed and the C flag is + // still clear from the shift argument in orrs; if a is positive and b + // negative, this places 0 in r0; if a is negative and b positive, -1 is + // placed in r0. +#if defined(USE_THUMB_1) + bhs 1f + // Here if a and b have the same sign and absA < absB, the result is thus + // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan). + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN) +1: +#else + it lo + mvnlo r0, r1, asr #31 +#endif + + // If a is greater in magnitude than b and both have the same sign, place + // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed + // in r0, which is the desired result. Conversely, if both are positive + // and a > b, zero is placed in r0. +#if defined(USE_THUMB_1) + bls 1f + // Here both have the same sign and absA > absB. + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN) + negs r0, r0 +1: +#else + it hi + movhi r0, r1, asr #31 +#endif + + // If you've been keeping track, at this point r0 contains -1 if a < b and + // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. + // If a == b, then the Z flag is set, so we can get the correct final value + // into r0 by simply or'ing with 1 if Z is clear. + // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b. +#if !defined(USE_THUMB_1) + it ne + orrne r0, r0, #1 +#endif + + // Finally, we need to deal with NaNs. If either argument is NaN, replace + // the value in r0 with 1. +#if defined(USE_THUMB_1) +LOCAL_LABEL(CHECK_NAN): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 1f + cmp r3, r6 +1: + bls 2f + movs r0, #1 +2: + pop {r6, pc} +#else + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__eqsf2) + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) + +@ int __gtsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2) + // Identical to the preceding except in that we return -1 for NaN values. + // Given that the two paths share so much code, one might be tempted to + // unify them; however, the extra code needed to do so makes the code size + // to performance tradeoff very hard to justify for such small functions. +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif +#if defined(USE_THUMB_1) + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r6, r3, #1 + orrs r6, r2 + beq 1f + movs r6, r0 + eors r6, r1 +1: + bmi 2f + subs r0, r2, r3 +2: + bhs 3f + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN_2) +3: + bls 4f + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 +4: +LOCAL_LABEL(CHECK_NAN_2): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 5f + cmp r3, r6 +5: + bls 6f + movs r0, #1 + negs r0, r0 +6: + pop {r6, pc} +#else + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + orrs r12, r2, r3, lsr #1 + it ne + eorsne r12, r0, r1 + it pl + subspl r0, r2, r3 + it lo + mvnlo r0, r1, asr #31 + it hi + movhi r0, r1, asr #31 + it ne + orrne r0, r0, #1 + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #-1 + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__gtsf2) + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) + +@ int __unordsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2) + +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif + // Return 1 for NaN values, 0 otherwise. + lsls r2, r0, #1 + lsls r3, r1, #1 + movs r0, #0 +#if defined(USE_THUMB_1) + movs r1, #0xff + lsls r1, #24 + cmp r2, r1 + bhi 1f + cmp r3, r1 +1: + bls 2f + movs r0, #1 +2: +#else + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__unordsf2) + +#if defined(COMPILER_RT_ARMHF_TARGET) +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum) + vmov s0, r0 + vmov s1, r1 + b SYMBOL_NAME(__unordsf2) +END_COMPILERRT_FUNCTION(__aeabi_fcmpum) +#else +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S new file mode 100644 index 000000000000..776ba4f24b47 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S @@ -0,0 +1,33 @@ +//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __divdf3vfp(double a, double b); +// +// Divides two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vdiv.f64 d5, d6, d7 + vmov r0, r1, d5 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__divdf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S new file mode 100644 index 000000000000..8a027b741efe --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S @@ -0,0 +1,71 @@ +/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divmodsi4 (32-bit signed integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4-r7, lr} ;\ + add r7, sp, #12 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4-r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + +@ int __divmodsi4(int divident, int divisor, int *remainder) +@ Calculate the quotient and remainder of the (signed) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 3 +DEFINE_COMPILERRT_FUNCTION(__divmodsi4) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + mov r3, r0 + sdiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME +// Set aside the sign of the quotient and modulus, and the address for the +// modulus. + eor r4, r0, r1 + mov r5, r0 + mov r6, r2 +// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor ip, r0, r0, asr #31 + eor lr, r1, r1, asr #31 + sub r0, ip, r0, asr #31 + sub r1, lr, r1, asr #31 +// Unsigned divmod: + bl SYMBOL_NAME(__udivmodsi4) +// Apply the sign of quotient and modulus + ldr r1, [r6] + eor r0, r0, r4, asr #31 + eor r1, r1, r5, asr #31 + sub r0, r0, r4, asr #31 + sub r1, r1, r5, asr #31 + str r1, [r6] + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divmodsi4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S new file mode 100644 index 000000000000..130318f0c37b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S @@ -0,0 +1,33 @@ +//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __divsf3vfp(float a, float b); +// +// Divides two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vdiv.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__divsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/divsi3.S b/contrib/compiler-rt/lib/builtins/arm/divsi3.S new file mode 100644 index 000000000000..19757af177eb --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/divsi3.S @@ -0,0 +1,82 @@ +/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __divsi3 (32-bit signed integer divide) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 3 +// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) + +@ int __divsi3(int divident, int divisor) +@ Calculate and return the quotient of the (signed) division. + +DEFINE_COMPILERRT_FUNCTION(__divsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1,r1 + beq LOCAL_LABEL(divzero) + sdiv r0, r0, r1 + bx lr +LOCAL_LABEL(divzero): + mov r0,#0 + bx lr +#else +ESTABLISH_FRAME +// Set aside the sign of the quotient. +# if defined(USE_THUMB_1) + movs r4, r0 + eors r4, r1 +# else + eor r4, r0, r1 +# endif +// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). +# if defined(USE_THUMB_1) + asrs r2, r0, #31 + asrs r3, r1, #31 + eors r0, r2 + eors r1, r3 + subs r0, r0, r2 + subs r1, r1, r3 +# else + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 +# endif +// abs(a) / abs(b) + bl SYMBOL_NAME(__udivsi3) +// Apply sign of quotient to result and return. +# if defined(USE_THUMB_1) + asrs r4, #31 + eors r0, r4 + subs r0, r0, r4 +# else + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 +# endif + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S new file mode 100644 index 000000000000..d50706570916 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S @@ -0,0 +1,37 @@ +//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqdf2vfp(double a, double b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(eq) + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S new file mode 100644 index 000000000000..fd72b2fdbdee --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S @@ -0,0 +1,37 @@ +//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqsf2vfp(float a, float b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(eq) + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S new file mode 100644 index 000000000000..1079f977bae6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S @@ -0,0 +1,33 @@ +//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __extendsfdf2vfp(float a); +// +// Converts single precision float to double precision result. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR and a double precision result is returned in R0/R1 pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f64.f32 d0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.f64.f32 d7, s15 // convert single to double + vmov r0, r1, d7 // return result in r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__extendsfdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S new file mode 100644 index 000000000000..5d7b0f856549 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S @@ -0,0 +1,34 @@ +//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixdfsivfp(double a); +// +// Converts double precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f64 s0, d0 + vmov r0, s0 +#else + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixdfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S new file mode 100644 index 000000000000..805a277afa34 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S @@ -0,0 +1,34 @@ +//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixsfsivfp(float a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f32 s0, s0 + vmov r0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixsfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S new file mode 100644 index 000000000000..4f1b2c8cefdc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S @@ -0,0 +1,35 @@ +//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunsdfsivfp(double a); +// +// Converts double precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f64 s0, d0 + vmov r0, s0 +#else + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixunsdfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S new file mode 100644 index 000000000000..e5d778236879 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S @@ -0,0 +1,35 @@ +//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunssfsivfp(float a); +// +// Converts single precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f32 s0, s0 + vmov r0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixunssfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S new file mode 100644 index 000000000000..3297ad44d8cd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S @@ -0,0 +1,34 @@ +//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatsidfvfp(int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.s32 d0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatsidfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S new file mode 100644 index 000000000000..65408b54b8d4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S @@ -0,0 +1,34 @@ +//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatsisfvfp(int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.s32 s0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatsisfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S new file mode 100644 index 000000000000..d7a7024a25b8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S @@ -0,0 +1,34 @@ +//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatunssidfvfp(unsigned int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.u32 d0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatunssidfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S new file mode 100644 index 000000000000..1ca856519a92 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S @@ -0,0 +1,34 @@ +//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatunssisfvfp(unsigned int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.u32 s0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatunssisfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S new file mode 100644 index 000000000000..364fc5b24cd1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S @@ -0,0 +1,37 @@ +//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gedf2vfp(double a, double b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ge) + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gedf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S new file mode 100644 index 000000000000..346c3473ae4c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S @@ -0,0 +1,37 @@ +//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gesf2vfp(float a, float b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ge) + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S new file mode 100644 index 000000000000..3389c3ad9737 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S @@ -0,0 +1,37 @@ +//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __gtdf2vfp(double a, double b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(gt) + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S new file mode 100644 index 000000000000..afdba8b018e2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S @@ -0,0 +1,37 @@ +//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gtsf2vfp(float a, float b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(gt) + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S new file mode 100644 index 000000000000..4bbe4c86837c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S @@ -0,0 +1,37 @@ +//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ledf2vfp(double a, double b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ls) + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ledf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S new file mode 100644 index 000000000000..51232bd8cedc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S @@ -0,0 +1,37 @@ +//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __lesf2vfp(float a, float b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ls) + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__lesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S new file mode 100644 index 000000000000..8e2928c813d2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S @@ -0,0 +1,37 @@ +//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ltdf2vfp(double a, double b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(mi) + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S new file mode 100644 index 000000000000..59c00c6bab67 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S @@ -0,0 +1,37 @@ +//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __ltsf2vfp(float a, float b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(mi) + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/modsi3.S b/contrib/compiler-rt/lib/builtins/arm/modsi3.S new file mode 100644 index 000000000000..be263834d7f1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/modsi3.S @@ -0,0 +1,60 @@ +/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __modsi3 (32-bit signed integer modulus) function + * for the ARM architecture as a wrapper around the unsigned routine. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + +@ int __modsi3(int divident, int divisor) +@ Calculate and return the remainder of the (signed) division. + + .p2align 3 +DEFINE_COMPILERRT_FUNCTION(__modsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + sdiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME + // Set aside the sign of the dividend. + mov r4, r0 + // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 + // abs(a) % abs(b) + bl SYMBOL_NAME(__umodsi3) + // Apply sign of dividend to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__modsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S new file mode 100644 index 000000000000..aa7b23495034 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S @@ -0,0 +1,33 @@ +//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __muldf3vfp(double a, double b); +// +// Multiplies two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vmul.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__muldf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S new file mode 100644 index 000000000000..a1da789dcade --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S @@ -0,0 +1,33 @@ +//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __mulsf3vfp(float a, float b); +// +// Multiplies two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vmul.f32 s13, s14, s15 +#endif + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__mulsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S new file mode 100644 index 000000000000..aef72eb00974 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S @@ -0,0 +1,37 @@ +//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __nedf2vfp(double a, double b); +// +// Returns zero if a and b are unequal and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ne) + movne r0, #1 // set result register to 0 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nedf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S new file mode 100644 index 000000000000..81f0ab8eec1d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S @@ -0,0 +1,30 @@ +//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __negdf2vfp(double a, double b); +// +// Returns the negation a double precision floating point numbers using the +// Darwin calling convention where double arguments are passsed in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f64 d0, d0 +#else + eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__negdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S new file mode 100644 index 000000000000..46ab4a9cf164 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S @@ -0,0 +1,30 @@ +//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __negsf2vfp(float a); +// +// Returns the negation of a single precision floating point numbers using the +// Darwin calling convention where single arguments are passsed like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f32 s0, s0 +#else + eor r0, r0, #-2147483648 // flip sign bit on float in r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__negsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S new file mode 100644 index 000000000000..50d60f493005 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S @@ -0,0 +1,37 @@ +//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __nesf2vfp(float a, float b); +// +// Returns one iff a != b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ne) + movne r0, #1 // set result register to 1 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S new file mode 100644 index 000000000000..0692cf3e1b77 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S @@ -0,0 +1,35 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Restore registers d8-d15 from stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) + vldmia sp!, {d8-d15} // pop registers d8-d15 off stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S new file mode 100644 index 000000000000..544dd5467a4d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S @@ -0,0 +1,35 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Save registers d8-d15 onto stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) + vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list new file mode 100644 index 000000000000..cc6a4b3cdd2e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list @@ -0,0 +1,21 @@ +# +# These are soft float functions which can be +# aliased to the *vfp functions on arm processors +# that support floating point instructions. +# +___adddf3vfp ___adddf3 +___addsf3vfp ___addsf3 +___divdf3vfp ___divdf3 +___divsf3vfp ___divsf3 +___extendsfdf2vfp ___extendsfdf2 +___fixdfsivfp ___fixdfsi +___fixsfsivfp ___fixsfsi +___floatsidfvfp ___floatsidf +___floatsisfvfp ___floatsisf +___muldf3vfp ___muldf3 +___mulsf3vfp ___mulsf3 +___subdf3vfp ___subdf3 +___subsf3vfp ___subsf3 +___truncdfsf2vfp ___truncdfsf2 +___floatunssidfvfp ___floatunsidf +___floatunssisfvfp ___floatunsisf diff --git a/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S new file mode 100644 index 000000000000..2b6f2bdbfdd5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S @@ -0,0 +1,33 @@ +//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __subdf3vfp(double a, double b); +// +// Returns difference between two double precision floating point numbers using +// the Darwin calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vsub.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__subdf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S new file mode 100644 index 000000000000..3e83ea26507d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S @@ -0,0 +1,34 @@ +//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __subsf3vfp(float a, float b); +// +// Returns the difference between two single precision floating point numbers +// using the Darwin calling convention where single arguments are passsed +// like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vsub.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__subsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/switch16.S b/contrib/compiler-rt/lib/builtins/arm/switch16.S new file mode 100644 index 000000000000..df9e38e176ce --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/switch16.S @@ -0,0 +1,46 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 2-byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) + ldrh ip, [lr, #-1] // get first 16-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #1 // compute address of element in table + add ip, lr, ip, lsl #1 // compute address of last element in table + ite lo + ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range + ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch16) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/switch32.S b/contrib/compiler-rt/lib/builtins/arm/switch32.S new file mode 100644 index 000000000000..d97b5361436d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/switch32.S @@ -0,0 +1,46 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 4-byte sized elements which are the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) + ldr ip, [lr, #-1] // get first 32-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #2 // compute address of element in table + add ip, lr, ip, lsl #2 // compute address of last element in table + ite lo + ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range + ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range + add ip, lr, r0 // compute label = lr + element + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch32) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/switch8.S b/contrib/compiler-rt/lib/builtins/arm/switch8.S new file mode 100644 index 000000000000..4d9e0eaff845 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/switch8.S @@ -0,0 +1,44 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // signed compare with index + ite lo + ldrsblo r0, [lr, r0] // get indexed byte out of table + ldrsbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch8) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/switchu8.S b/contrib/compiler-rt/lib/builtins/arm/switchu8.S new file mode 100644 index 000000000000..4ffe35f0549b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/switchu8.S @@ -0,0 +1,44 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains unsigned byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // compare with index + ite lo + ldrblo r0, [lr, r0] // get indexed byte out of table + ldrbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switchu8) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync-ops.h b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h new file mode 100644 index 000000000000..ee02c30c6eaa --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h @@ -0,0 +1,64 @@ +/*===-- sync-ops.h - --===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements outline macros for the __sync_fetch_and_* + * operations. Different instantiations will generate appropriate assembly for + * ARM and Thumb-2 versions of the functions. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define SYNC_OP_4(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrex r0, [r12] ; \ + op(r2, r0, r1) ; \ + strex r3, r2, [r12] ; \ + cmp r3, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + bx lr + +#define SYNC_OP_8(op) \ + .p2align 2 ; \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + push {r4, r5, r6, lr} ; \ + dmb ; \ + mov r12, r0 ; \ + LOCAL_LABEL(tryatomic_ ## op): \ + ldrexd r0, r1, [r12] ; \ + op(r4, r5, r0, r1, r2, r3) ; \ + strexd r6, r4, r5, [r12] ; \ + cmp r6, #0 ; \ + bne LOCAL_LABEL(tryatomic_ ## op) ; \ + dmb ; \ + pop {r4, r5, r6, pc} + +#define MINMAX_4(rD, rN, rM, cmp_kind) \ + cmp rN, rM ; \ + mov rD, rM ; \ + it cmp_kind ; \ + mov##cmp_kind rD, rN + +#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ + cmp rN_LO, rM_LO ; \ + sbcs rN_HI, rM_HI ; \ + mov rD_LO, rM_LO ; \ + mov rD_HI, rM_HI ; \ + itt cmp_kind ; \ + mov##cmp_kind rD_LO, rN_LO ; \ + mov##cmp_kind rD_HI, rN_HI diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S new file mode 100644 index 000000000000..7877d6c46c11 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S @@ -0,0 +1,23 @@ +/*===-- sync_fetch_and_add_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "adds" is 2 bytes shorter than "add". */ +#define add_4(rD, rN, rM) add rD, rN, rM + +SYNC_OP_4(add_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S new file mode 100644 index 000000000000..1df07a342a1b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_add_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_add_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + adds rD_LO, rN_LO, rM_LO ; \ + adc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(add_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S new file mode 100644 index 000000000000..720ff02279cd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_and_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define and_4(rD, rN, rM) and rD, rN, rM + +SYNC_OP_4(and_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S new file mode 100644 index 000000000000..4f7b5ca7ab29 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_and_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_and_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + and rD_LO, rN_LO, rM_LO ; \ + and rD_HI, rN_HI, rM_HI + +SYNC_OP_8(and_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S new file mode 100644 index 000000000000..43da9c7d4067 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_max_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt) + +SYNC_OP_4(max_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S new file mode 100644 index 000000000000..898fc6202ac8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_max_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_max_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) + +SYNC_OP_8(max_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S new file mode 100644 index 000000000000..bba31a03aace --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_min_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) + +SYNC_OP_4(min_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S new file mode 100644 index 000000000000..e7ccf9fb60ef --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_min_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_min_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) + +SYNC_OP_8(min_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S new file mode 100644 index 000000000000..c13dd394588f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define nand_4(rD, rN, rM) bic rD, rN, rM + +SYNC_OP_4(nand_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S new file mode 100644 index 000000000000..e8107ab3a33c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_nand_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + bic rD_LO, rN_LO, rM_LO ; \ + bic rD_HI, rN_HI, rM_HI + +SYNC_OP_8(nand_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S new file mode 100644 index 000000000000..6726571a944f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_or_4.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define or_4(rD, rN, rM) orr rD, rN, rM + +SYNC_OP_4(or_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S new file mode 100644 index 000000000000..f7f162c7c3b3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_or_8.S - -------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_or_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + orr rD_LO, rN_LO, rM_LO ; \ + orr rD_HI, rN_HI, rM_HI + +SYNC_OP_8(or_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S new file mode 100644 index 000000000000..b9326b14cdd5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S @@ -0,0 +1,23 @@ +/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +/* "subs" is 2 bytes shorter than "sub". */ +#define sub_4(rD, rN, rM) sub rD, rN, rM + +SYNC_OP_4(sub_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S new file mode 100644 index 000000000000..6ce743e5ee9f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_sub_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + subs rD_LO, rN_LO, rM_LO ; \ + sbc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(sub_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S new file mode 100644 index 000000000000..b8d19ff35057 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi) + +SYNC_OP_4(umax_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S new file mode 100644 index 000000000000..34442fd77454 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umax_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) + +SYNC_OP_8(umax_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S new file mode 100644 index 000000000000..0998e3e10f58 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) + +SYNC_OP_4(umin_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S new file mode 100644 index 000000000000..558f91390512 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S @@ -0,0 +1,24 @@ +/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_umin_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) + +SYNC_OP_8(umin_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S new file mode 100644 index 000000000000..824f49146880 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S @@ -0,0 +1,22 @@ +/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_4 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#define xor_4(rD, rN, rM) eor rD, rN, rM + +SYNC_OP_4(xor_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S new file mode 100644 index 000000000000..073fb9c20f25 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S @@ -0,0 +1,26 @@ +/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __sync_fetch_and_xor_8 function for the ARM + * architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + eor rD_LO, rN_LO, rM_LO ; \ + eor rD_HI, rN_HI, rM_HI + +SYNC_OP_8(xor_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S new file mode 100644 index 000000000000..61d1db910f0d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S @@ -0,0 +1,38 @@ +//===-- sync_synchronize - Implement memory barrier * ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode +// the compiler may emit a call to __sync_synchronize. +// On Darwin the implementation jumps to an OS supplied function named +// OSMemoryBarrier +// + + .text + .syntax unified + +#if __APPLE__ + + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) + stmfd sp!, {r7, lr} + add r7, sp, #0 + bl _OSMemoryBarrier + ldmfd sp!, {r7, pc} +END_COMPILERRT_FUNCTION(__sync_synchronize) + + // tell linker it can break up file at label boundaries + .subsections_via_symbols + +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S new file mode 100644 index 000000000000..682e54d3d294 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S @@ -0,0 +1,33 @@ +//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __truncdfsf2vfp(double a); +// +// Converts double precision float to signle precision result. +// Uses Darwin calling convention where a double precision parameter is +// passed in a R0/R1 pair and a signle precision result is returned in R0. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f32.f64 s0, d0 +#else + vmov d7, r0, r1 // load double from r0/r1 pair + vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) + vmov r0, s15 // return result in r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__truncdfsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S new file mode 100644 index 000000000000..ee3950c9b0eb --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S @@ -0,0 +1,180 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@ unsigned int *remainder) +@ Calculate the quotient and remainder of the (unsigned) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + beq LOCAL_LABEL(divby1) + cmp r0, r1 + bcc LOCAL_LABEL(quotient0) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif + str r4, [sp, #-8]! + + mov r4, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r4, #16 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r4, #8 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r4, #4 + cmp r3, r1 + movhs r4, r3 + subhs ip, #(4 * 12) + + lsr r3, r4, #2 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r3 or r4. */ + cmp r1, r4, lsr #1 + subls ip, ip, #(1 * 12) + + ldr r4, [sp], #8 /* restore r4, we are done with it. */ + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + str r0, [r2] + mov r0, r3 + JMP(lr) + +LOCAL_LABEL(quotient0): + str r0, [r2] + mov r0, #0 + JMP(lr) + +LOCAL_LABEL(divby1): + mov r3, #0 + str r3, [r2] + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivmodsi4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/udivsi3.S b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S new file mode 100644 index 000000000000..6dea27d404ff --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S @@ -0,0 +1,264 @@ +/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivsi3 (32-bit unsigned integer divide) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + +DEFINE_CODE_STATE + + .p2align 2 +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) + +@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the quotient of the (unsigned) division. + +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r0, r0, r1 + bx lr + +LOCAL_LABEL(divby0): + mov r0, #0 +# ifdef __ARM_EABI__ + b __aeabi_idiv0 +# else + JMP(lr) +# endif + +#else /* ! __ARM_ARCH_EXT_IDIV__ */ + cmp r1, #1 + bcc LOCAL_LABEL(divby0) +#if defined(USE_THUMB_1) + bne LOCAL_LABEL(num_neq_denom) + JMP(lr) +LOCAL_LABEL(num_neq_denom): +#else + IT(eq) + JMPc(lr, eq) +#endif + cmp r0, r1 +#if defined(USE_THUMB_1) + bhs LOCAL_LABEL(num_ge_denom) + movs r0, #0 + JMP(lr) +LOCAL_LABEL(num_ge_denom): +#else + ITT(cc) + movcc r0, #0 + JMPc(lr, cc) +#endif + + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# if defined(__ARM_FEATURE_CLZ) + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else /* No CLZ Feature */ +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif +# if defined(USE_THUMB_1) +# define BLOCK_SIZE 10 +# else +# define BLOCK_SIZE 12 +# endif + + mov r2, r0 +# if defined(USE_THUMB_1) + mov ip, r0 + adr r0, LOCAL_LABEL(div0block) + adds r0, #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + lsrs r3, r2, #16 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_16) + movs r2, r3 + subs r0, r0, #(16 * BLOCK_SIZE) +LOCAL_LABEL(skip_16): +# else + movhs r2, r3 + subhs ip, ip, #(16 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #8 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_8) + movs r2, r3 + subs r0, r0, #(8 * BLOCK_SIZE) +LOCAL_LABEL(skip_8): +# else + movhs r2, r3 + subhs ip, ip, #(8 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #4 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_4) + movs r2, r3 + subs r0, r0, #(4 * BLOCK_SIZE) +LOCAL_LABEL(skip_4): +# else + movhs r2, r3 + subhs ip, #(4 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #2 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_2) + movs r2, r3 + subs r0, r0, #(2 * BLOCK_SIZE) +LOCAL_LABEL(skip_2): +# else + movhs r2, r3 + subhs ip, ip, #(2 * BLOCK_SIZE) +# endif + + /* Last block, no need to update r2 or r3. */ +# if defined(USE_THUMB_1) + lsrs r3, r2, #1 + cmp r3, r1 + blo LOCAL_LABEL(skip_1) + subs r0, r0, #(1 * BLOCK_SIZE) +LOCAL_LABEL(skip_1): + movs r2, r0 + mov r0, ip + movs r3, #0 + JMP (r2) + +# else + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * BLOCK_SIZE) + + movs r3, #0 + + JMP(ip) +# endif +# endif /* __ARM_FEATURE_CLZ */ + + +#define IMM # + /* due to the range limit of branch in Thumb1, we have to place the + block closer */ +LOCAL_LABEL(divby0): + movs r0, #0 +# if defined(__ARM_EABI__) + push {r7, lr} + bl __aeabi_idiv0 // due to relocation limit, can't use b. + pop {r7, pc} +# else + JMP(lr) +# endif + + +#if defined(USE_THUMB_1) +#define block(shift) \ + lsls r2, r1, IMM shift; \ + cmp r0, r2; \ + blo LOCAL_LABEL(block_skip_##shift); \ + subs r0, r0, r2; \ + LOCAL_LABEL(block_skip_##shift) :; \ + adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */ + + /* TODO: if current location counter is not not word aligned, we don't + need the .p2align and nop */ + /* Label div0block must be word-aligned. First align block 31 */ + .p2align 2 + nop /* Padding to align div0block as 31 blocks = 310 bytes */ + +#else +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift +#endif + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + mov r0, r3 + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +END_COMPILERRT_FUNCTION(__udivsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/umodsi3.S b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S new file mode 100644 index 000000000000..069fad34cb9c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S @@ -0,0 +1,158 @@ +/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __umodsi3 (32-bit unsigned integer modulus) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the remainder of the (unsigned) division. + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + ITT(eq) + moveq r0, #0 + JMPc(lr, eq) + cmp r0, r1 + IT(cc) + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 8 * I. + * This depends on the fixed instruction size of block. + * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #3 + bx ip +# else +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 8) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 8) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 8) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 8) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 8) + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + IT(hs); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__umodsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S new file mode 100644 index 000000000000..6625fa8a3119 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S @@ -0,0 +1,37 @@ +//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unorddf2vfp(double a, double b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(vs) + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unorddf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S new file mode 100644 index 000000000000..0b5da2ba3e17 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S @@ -0,0 +1,37 @@ +//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unordsf2vfp(float a, float b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(vs) + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unordsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/ashldi3.c b/contrib/compiler-rt/lib/builtins/ashldi3.c new file mode 100644 index 000000000000..a5c1836006b9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ashldi3.c @@ -0,0 +1,45 @@ +/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashldi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a << b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +COMPILER_RT_ABI di_int +__ashldi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); + } + return result.all; +} + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/ashlti3.c b/contrib/compiler-rt/lib/builtins/ashlti3.c new file mode 100644 index 000000000000..638ae845ff04 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ashlti3.c @@ -0,0 +1,45 @@ +/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashlti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a << b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__ashlti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/ashrdi3.c b/contrib/compiler-rt/lib/builtins/ashrdi3.c new file mode 100644 index 000000000000..84619965eca0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ashrdi3.c @@ -0,0 +1,46 @@ +/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashrdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: arithmetic a >> b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +COMPILER_RT_ABI di_int +__ashrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_word - 1); + result.s.low = input.s.high >> (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/ashrti3.c b/contrib/compiler-rt/lib/builtins/ashrti3.c new file mode 100644 index 000000000000..f78205d961e4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ashrti3.c @@ -0,0 +1,46 @@ +/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ashrti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: arithmetic a >> b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__ashrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_dword - 1); + result.s.low = input.s.high >> (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/assembly.h b/contrib/compiler-rt/lib/builtins/assembly.h new file mode 100644 index 000000000000..3f5e59b25442 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/assembly.h @@ -0,0 +1,204 @@ +/* ===-- assembly.h - compiler-rt assembler support macros -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file defines macros for use in compiler-rt assembler source. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef COMPILERRT_ASSEMBLY_H +#define COMPILERRT_ASSEMBLY_H + +#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) +#define SEPARATOR @ +#else +#define SEPARATOR ; +#endif + +#if defined(__APPLE__) +#define HIDDEN(name) .private_extern name +#define LOCAL_LABEL(name) L_##name +// tell linker it can break up file at label boundaries +#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols +#define SYMBOL_IS_FUNC(name) +#define CONST_SECTION .const + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__ELF__) + +#define HIDDEN(name) .hidden name +#define LOCAL_LABEL(name) .L_##name +#define FILE_LEVEL_DIRECTIVE +#if defined(__arm__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define CONST_SECTION .section .rodata + +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) +#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits +#else +#define NO_EXEC_STACK_DIRECTIVE +#endif + +#else // !__APPLE__ && !__ELF__ + +#define HIDDEN(name) +#define LOCAL_LABEL(name) .L ## name +#define FILE_LEVEL_DIRECTIVE +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define CONST_SECTION .section .rdata,"rd" + +#define NO_EXEC_STACK_DIRECTIVE + +#endif + +#if defined(__arm__) + +/* + * Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: + * - for '-mthumb -march=armv6' compiler defines '__thumb__' + * - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__' + */ +#if defined(__thumb2__) || defined(__thumb__) +#define DEFINE_CODE_STATE .thumb SEPARATOR +#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR +#if defined(__thumb2__) +#define USE_THUMB_2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#define ITE(cond) ite cond +#else +#define USE_THUMB_1 +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif // defined(__thumb__2) +#else // !defined(__thumb2__) && !defined(__thumb__) +#define DEFINE_CODE_STATE .arm SEPARATOR +#define DECLARE_FUNC_ENCODING +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif + +#if defined(USE_THUMB_1) && defined(USE_THUMB_2) +#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together." +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif +#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \ + (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) +#define __ARM_FEATURE_CLZ +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#define JMPc(r, c) bx##c r +#else +#define JMP(r) mov pc, r +#define JMPc(r, c) mov##c pc, r +#endif + +// pop {pc} can't switch Thumb mode on ARMv4T +#if __ARM_ARCH >= 5 +#define POP_PC() pop {pc} +#else +#define POP_PC() \ + pop {ip}; \ + JMP(ip) +#endif + +#if defined(USE_THUMB_2) +#define WIDE(op) op.w +#else +#define WIDE(op) op +#endif +#else // !defined(__arm) +#define DECLARE_FUNC_ENCODING +#define DEFINE_CODE_STATE +#endif + +#define GLUE2(a, b) a##b +#define GLUE(a, b) GLUE2(a, b) +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#ifdef VISIBILITY_HIDDEN +#define DECLARE_SYMBOL_VISIBILITY(name) \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR +#else +#define DECLARE_SYMBOL_VISIBILITY(name) +#endif + +#define DEFINE_COMPILERRT_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .thumb_func SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + HIDDEN(name) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: + +#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR \ + .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR + +#if defined(__ARM_EABI__) +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ + DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) +#else +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) +#endif + +#ifdef __ELF__ +#define END_COMPILERRT_FUNCTION(name) \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#else +#define END_COMPILERRT_FUNCTION(name) +#endif + +#endif /* COMPILERRT_ASSEMBLY_H */ diff --git a/contrib/compiler-rt/lib/builtins/atomic.c b/contrib/compiler-rt/lib/builtins/atomic.c new file mode 100644 index 000000000000..ee35e342eda5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic.c @@ -0,0 +1,338 @@ +/*===-- atomic.c - Implement support functions for atomic operations.------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * atomic.c defines a set of functions for performing atomic accesses on + * arbitrary-sized memory locations. This design uses locks that should + * be fast in the uncontended case, for two reasons: + * + * 1) This code must work with C programs that do not link to anything + * (including pthreads) and so it should not depend on any pthread + * functions. + * 2) Atomic operations, rather than explicit mutexes, are most commonly used + * on code where contended operations are rate. + * + * To avoid needing a per-object lock, this code allocates an array of + * locks and hashes the object pointers to find the one that it should use. + * For operations that must be atomic on two locations, the lower lock is + * always acquired first, to avoid deadlock. + * + *===----------------------------------------------------------------------=== + */ + +#include <stdint.h> +#include <string.h> + +#include "assembly.h" + +// Clang objects if you redefine a builtin. This little hack allows us to +// define a function with the same name as an intrinsic. +#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) +#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) +#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) +#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange) + +/// Number of locks. This allocates one page on 32-bit platforms, two on +/// 64-bit. This can be specified externally if a different trade between +/// memory usage and contention probability is required for a given platform. +#ifndef SPINLOCK_COUNT +#define SPINLOCK_COUNT (1<<10) +#endif +static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; + +//////////////////////////////////////////////////////////////////////////////// +// Platform-specific lock implementation. Falls back to spinlocks if none is +// defined. Each platform should define the Lock type, and corresponding +// lock() and unlock() functions. +//////////////////////////////////////////////////////////////////////////////// +#ifdef __FreeBSD__ +#include <errno.h> +#include <sys/types.h> +#include <machine/atomic.h> +#include <sys/umtx.h> +typedef struct _usem Lock; +__inline static void unlock(Lock *l) { + __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE); + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + if (l->_has_waiters) + _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); +} +__inline static void lock(Lock *l) { + uint32_t old = 1; + while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); + old = 1; + } +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT] = { [0 ... SPINLOCK_COUNT-1] = {0,1,0} }; + +#elif defined(__APPLE__) +#include <libkern/OSAtomic.h> +typedef OSSpinLock Lock; +__inline static void unlock(Lock *l) { + OSSpinLockUnlock(l); +} +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { + OSSpinLockLock(l); +} +static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 + +#else +typedef _Atomic(uintptr_t) Lock; +/// Unlock a lock. This is a release operation. +__inline static void unlock(Lock *l) { + __c11_atomic_store(l, 0, __ATOMIC_RELEASE); +} +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { + uintptr_t old = 0; + while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) + old = 0; +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT]; +#endif + + +/// Returns a lock to use for a given pointer. +static __inline Lock *lock_for_pointer(void *ptr) { + intptr_t hash = (intptr_t)ptr; + // Disregard the lowest 4 bits. We want all values that may be part of the + // same memory operation to hash to the same value and therefore use the same + // lock. + hash >>= 4; + // Use the next bits as the basis for the hash + intptr_t low = hash & SPINLOCK_MASK; + // Now use the high(er) set of bits to perturb the hash, so that we don't + // get collisions from atomic fields in a single object + hash >>= 16; + hash ^= low; + // Return a pointer to the word to use + return locks + (hash & SPINLOCK_MASK); +} + +/// Macros for determining whether a size is lock free. Clang can not yet +/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are +/// not lock free. +#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) +#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) +#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) +#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) +#define IS_LOCK_FREE_16 0 + +/// Macro that calls the compiler-generated lock-free versions of functions +/// when they exist. +#define LOCK_FREE_CASES() \ + do {\ + switch (size) {\ + case 2:\ + if (IS_LOCK_FREE_2) {\ + LOCK_FREE_ACTION(uint16_t);\ + }\ + case 4:\ + if (IS_LOCK_FREE_4) {\ + LOCK_FREE_ACTION(uint32_t);\ + }\ + case 8:\ + if (IS_LOCK_FREE_8) {\ + LOCK_FREE_ACTION(uint64_t);\ + }\ + case 16:\ + if (IS_LOCK_FREE_16) {\ + /* FIXME: __uint128_t isn't available on 32 bit platforms. + LOCK_FREE_ACTION(__uint128_t);*/\ + }\ + }\ + } while (0) + + +/// An atomic load operation. This is atomic with respect to the source +/// pointer only. +void __atomic_load_c(int size, void *src, void *dest, int model) { +#define LOCK_FREE_ACTION(type) \ + *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(src); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// An atomic store operation. This is atomic with respect to the destination +/// pointer only. +void __atomic_store_c(int size, void *dest, void *src, int model) { +#define LOCK_FREE_ACTION(type) \ + __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(dest); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// Atomic compare and exchange operation. If the value at *ptr is identical +/// to the value at *expected, then this copies value at *desired to *ptr. If +/// they are not, then this stores the current value from *ptr in *expected. +/// +/// This function returns 1 if the exchange takes place or 0 if it fails. +int __atomic_compare_exchange_c(int size, void *ptr, void *expected, + void *desired, int success, int failure) { +#define LOCK_FREE_ACTION(type) \ + return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ + *(type*)desired, success, failure) + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + if (memcmp(ptr, expected, size) == 0) { + memcpy(ptr, desired, size); + unlock(l); + return 1; + } + memcpy(expected, ptr, size); + unlock(l); + return 0; +} + +/// Performs an atomic exchange operation between two pointers. This is atomic +/// with respect to the target address. +void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { +#define LOCK_FREE_ACTION(type) \ + *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ + model);\ + return; + LOCK_FREE_CASES(); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + memcpy(old, ptr, size); + memcpy(ptr, val, size); + unlock(l); +} + +//////////////////////////////////////////////////////////////////////////////// +// Where the size is known at compile time, the compiler may emit calls to +// specialised versions of the above functions. +//////////////////////////////////////////////////////////////////////////////// +#ifdef __SIZEOF_INT128__ +#define OPTIMISED_CASES\ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ + OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) +#else +#define OPTIMISED_CASES\ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) +#endif + +#define OPTIMISED_CASE(n, lockfree, type)\ +type __atomic_load_##n(type *src, int model) {\ + if (lockfree)\ + return __c11_atomic_load((_Atomic(type)*)src, model);\ + Lock *l = lock_for_pointer(src);\ + lock(l);\ + type val = *src;\ + unlock(l);\ + return val;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +void __atomic_store_##n(type *dest, type val, int model) {\ + if (lockfree) {\ + __c11_atomic_store((_Atomic(type)*)dest, val, model);\ + return;\ + }\ + Lock *l = lock_for_pointer(dest);\ + lock(l);\ + *dest = val;\ + unlock(l);\ + return;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +type __atomic_exchange_##n(type *dest, type val, int model) {\ + if (lockfree)\ + return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ + Lock *l = lock_for_pointer(dest);\ + lock(l);\ + type tmp = *dest;\ + *dest = val;\ + unlock(l);\ + return tmp;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type)\ +int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ + int success, int failure) {\ + if (lockfree)\ + return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ + success, failure);\ + Lock *l = lock_for_pointer(ptr);\ + lock(l);\ + if (*ptr == *expected) {\ + *ptr = desired;\ + unlock(l);\ + return 1;\ + }\ + *expected = *ptr;\ + unlock(l);\ + return 0;\ +} +OPTIMISED_CASES +#undef OPTIMISED_CASE + +//////////////////////////////////////////////////////////////////////////////// +// Atomic read-modify-write operations for integers of various sizes. +//////////////////////////////////////////////////////////////////////////////// +#define ATOMIC_RMW(n, lockfree, type, opname, op) \ +type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ + if (lockfree) \ + return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ + Lock *l = lock_for_pointer(ptr);\ + lock(l);\ + type tmp = *ptr;\ + *ptr = tmp op val;\ + unlock(l);\ + return tmp;\ +} + +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) +OPTIMISED_CASES +#undef OPTIMISED_CASE diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c new file mode 100644 index 000000000000..da912af64312 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c @@ -0,0 +1,27 @@ +/*===-- atomic_flag_clear.c -------------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_clear from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_flag_clear +void atomic_flag_clear(volatile atomic_flag *object) { + __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c new file mode 100644 index 000000000000..1059b787f169 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c @@ -0,0 +1,28 @@ +/*===-- atomic_flag_clear_explicit.c ----------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_clear_explicit from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_flag_clear_explicit +void atomic_flag_clear_explicit(volatile atomic_flag *object, + memory_order order) { + __c11_atomic_store(&(object)->_Value, 0, order); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c new file mode 100644 index 000000000000..e8811d39ef25 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c @@ -0,0 +1,27 @@ +/*===-- atomic_flag_test_and_set.c ------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_test_and_set from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_flag_test_and_set +_Bool atomic_flag_test_and_set(volatile atomic_flag *object) { + return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c new file mode 100644 index 000000000000..5c8c2df90543 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c @@ -0,0 +1,28 @@ +/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_flag_test_and_set_explicit +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, + memory_order order) { + return __c11_atomic_exchange(&(object)->_Value, 1, order); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c new file mode 100644 index 000000000000..9ccc2ae60ad8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c @@ -0,0 +1,27 @@ +/*===-- atomic_signal_fence.c -----------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_signal_fence from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_signal_fence +void atomic_signal_fence(memory_order order) { + __c11_atomic_signal_fence(order); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c new file mode 100644 index 000000000000..d22560151bc8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c @@ -0,0 +1,27 @@ +/*===-- atomic_thread_fence.c -----------------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===------------------------------------------------------------------------=== + * + * This file implements atomic_thread_fence from C11's stdatomic.h. + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include(<stdatomic.h>) + +#include <stdatomic.h> +#undef atomic_thread_fence +void atomic_thread_fence(memory_order order) { + __c11_atomic_thread_fence(order); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/bswapdi2.c b/contrib/compiler-rt/lib/builtins/bswapdi2.c new file mode 100644 index 000000000000..eb220007bb10 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/bswapdi2.c @@ -0,0 +1,27 @@ +/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __bswapdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { + return ( + (((u)&0xff00000000000000ULL) >> 56) | + (((u)&0x00ff000000000000ULL) >> 40) | + (((u)&0x0000ff0000000000ULL) >> 24) | + (((u)&0x000000ff00000000ULL) >> 8) | + (((u)&0x00000000ff000000ULL) << 8) | + (((u)&0x0000000000ff0000ULL) << 24) | + (((u)&0x000000000000ff00ULL) << 40) | + (((u)&0x00000000000000ffULL) << 56)); +} diff --git a/contrib/compiler-rt/lib/builtins/bswapsi2.c b/contrib/compiler-rt/lib/builtins/bswapsi2.c new file mode 100644 index 000000000000..5d941e69f7c4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/bswapsi2.c @@ -0,0 +1,23 @@ +/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __bswapsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { + return ( + (((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); +} diff --git a/contrib/compiler-rt/lib/builtins/clear_cache.c b/contrib/compiler-rt/lib/builtins/clear_cache.c new file mode 100644 index 000000000000..9dcab344ad1b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/clear_cache.c @@ -0,0 +1,199 @@ +/* ===-- clear_cache.c - Implement __clear_cache ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include <assert.h> +#include <stddef.h> + +#if __APPLE__ + #include <libkern/OSCacheControl.h> +#endif + +#if defined(_WIN32) +/* Forward declare Win32 APIs since the GCC mode driver does not handle the + newer SDKs as well as needed. */ +uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, + uintptr_t dwSize); +uintptr_t GetCurrentProcess(void); +#endif + +#if defined(__FreeBSD__) && defined(__arm__) + #include <sys/types.h> + #include <machine/sysarch.h> +#endif + +#if defined(__NetBSD__) && defined(__arm__) + #include <machine/sysarch.h> +#endif + +#if defined(__OpenBSD__) && defined(__mips__) + #include <sys/types.h> + #include <machine/sysarch.h> +#endif + +#if defined(__linux__) && defined(__mips__) + #include <sys/cachectl.h> + #include <sys/syscall.h> + #include <unistd.h> + #if defined(__ANDROID__) && defined(__LP64__) + /* + * clear_mips_cache - Invalidates instruction cache for Mips. + */ + static void clear_mips_cache(const void* Addr, size_t Size) { + __asm__ volatile ( + ".set push\n" + ".set noreorder\n" + ".set noat\n" + "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */ + "nop\n" + "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */ + "rdhwr $v0, $1\n" /* Get step size for SYNCI. + $1 is $HW_SYNCI_Step */ + "beq $v0, $zero, 20f\n" /* If no caches require + synchronization, branch + around. */ + "nop\n" + "10:\n" + "synci 0(%[Addr])\n" /* Synchronize all caches around + address. */ + "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */ + "sltu $at, %[Addr], %[Size]\n" /* Compare current with end + address. */ + "bne $at, $zero, 10b\n" /* Branch if more to do. */ + "nop\n" + "sync\n" /* Clear memory hazards. */ + "20:\n" + "bal 30f\n" + "nop\n" + "30:\n" + "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here. + Add offset of 12 to point to the + instruction after the last nop. + */ + "jr.hb $ra\n" /* Return, clearing instruction + hazards. */ + "nop\n" + ".set pop\n" + : [Addr] "+r"(Addr), [Size] "+r"(Size) + :: "at", "ra", "v0", "memory" + ); + } + #endif +#endif + +/* + * The compiler generates calls to __clear_cache() when creating + * trampoline functions on the stack for use with nested functions. + * It is expected to invalidate the instruction cache for the + * specified range. + */ + +void __clear_cache(void *start, void *end) { +#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) +/* + * Intel processors have a unified instruction and data cache + * so there is nothing to do + */ +#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) + FlushInstructionCache(GetCurrentProcess(), start, end - start); +#elif defined(__arm__) && !defined(__APPLE__) + #if defined(__FreeBSD__) || defined(__NetBSD__) + struct arm_sync_icache_args arg; + + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; + + sysarch(ARM_SYNC_ICACHE, &arg); + #elif defined(__linux__) + /* + * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but + * it also brought many other unused defines, as well as a dependency on + * kernel headers to be installed. + * + * This value is stable at least since Linux 3.13 and should remain so for + * compatibility reasons, warranting it's re-definition here. + */ + #define __ARM_NR_cacheflush 0x0f0002 + register int start_reg __asm("r0") = (int) (intptr_t) start; + const register int end_reg __asm("r1") = (int) (intptr_t) end; + const register int flags __asm("r2") = 0; + const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; + __asm __volatile("svc 0x0" + : "=r"(start_reg) + : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), + "r"(flags)); + assert(start_reg == 0 && "Cache flush syscall failed."); + #else + compilerrt_abort(); + #endif +#elif defined(__linux__) && defined(__mips__) + const uintptr_t start_int = (uintptr_t) start; + const uintptr_t end_int = (uintptr_t) end; + #if defined(__ANDROID__) && defined(__LP64__) + // Call synci implementation for short address range. + const uintptr_t address_range_limit = 256; + if ((end_int - start_int) <= address_range_limit) { + clear_mips_cache(start, (end_int - start_int)); + } else { + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); + } + #else + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); + #endif +#elif defined(__mips__) && defined(__OpenBSD__) + cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); +#elif defined(__aarch64__) && !defined(__APPLE__) + uint64_t xstart = (uint64_t)(uintptr_t) start; + uint64_t xend = (uint64_t)(uintptr_t) end; + uint64_t addr; + + // Get Cache Type Info + uint64_t ctr_el0; + __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + + /* + * dc & ic instructions must use 64bit registers so we don't use + * uintptr_t in case this runs in an IPL32 environment. + */ + const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); + for (addr = xstart & ~(dcache_line_size - 1); addr < xend; + addr += dcache_line_size) + __asm __volatile("dc cvau, %0" :: "r"(addr)); + __asm __volatile("dsb ish"); + + const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); + for (addr = xstart & ~(icache_line_size - 1); addr < xend; + addr += icache_line_size) + __asm __volatile("ic ivau, %0" :: "r"(addr)); + __asm __volatile("isb sy"); +#elif defined (__powerpc64__) + const size_t line_size = 32; + const size_t len = (uintptr_t)end - (uintptr_t)start; + + const uintptr_t mask = ~(line_size - 1); + const uintptr_t start_line = ((uintptr_t)start) & mask; + const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; + + for (uintptr_t line = start_line; line < end_line; line += line_size) + __asm__ volatile("dcbf 0, %0" : : "r"(line)); + __asm__ volatile("sync"); + + for (uintptr_t line = start_line; line < end_line; line += line_size) + __asm__ volatile("icbi 0, %0" : : "r"(line)); + __asm__ volatile("isync"); +#else + #if __APPLE__ + /* On Darwin, sys_icache_invalidate() provides this functionality */ + sys_icache_invalidate(start, end-start); + #else + compilerrt_abort(); + #endif +#endif +} diff --git a/contrib/compiler-rt/lib/builtins/clzdi2.c b/contrib/compiler-rt/lib/builtins/clzdi2.c new file mode 100644 index 000000000000..1819e6be436b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/clzdi2.c @@ -0,0 +1,40 @@ +/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of leading 0-bits */ + +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than + * __clzsi2, leading to infinite recursion. */ +#define __builtin_clz(a) __clzsi2(a) +extern si_int __clzsi2(si_int); +#endif + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/contrib/compiler-rt/lib/builtins/clzsi2.c b/contrib/compiler-rt/lib/builtins/clzsi2.c new file mode 100644 index 000000000000..25b8ed2c4c24 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/clzsi2.c @@ -0,0 +1,53 @@ +/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of leading 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */ + x >>= 16 - t; /* x = [0 - 0xFFFF] */ + su_int r = t; /* r = [0, 16] */ + /* return r + clz(x) */ + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; /* x = [0 - 0xFF] */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + clz(x) */ + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; /* x = [0 - 0xF] */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + clz(x) */ + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + clz(x) */ +/* switch (x) + * { + * case 0: + * return r + 2; + * case 1: + * return r + 1; + * case 2: + * case 3: + * return r; + * } + */ + return r + ((2 - x) & -((x & 2) == 0)); +} diff --git a/contrib/compiler-rt/lib/builtins/clzti2.c b/contrib/compiler-rt/lib/builtins/clzti2.c new file mode 100644 index 000000000000..15a7b3c90004 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/clzti2.c @@ -0,0 +1,33 @@ +/* ===-- clzti2.c - Implement __clzti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __clzti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the number of leading 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__clzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.s.high == 0); + return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/cmpdi2.c b/contrib/compiler-rt/lib/builtins/cmpdi2.c new file mode 100644 index 000000000000..52634d9c3362 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/cmpdi2.c @@ -0,0 +1,51 @@ +/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __cmpdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: if (a < b) returns 0 +* if (a == b) returns 1 +* if (a > b) returns 2 +*/ + +COMPILER_RT_ABI si_int +__cmpdi2(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#ifdef __ARM_EABI__ +/* Returns: if (a < b) returns -1 +* if (a == b) returns 0 +* if (a > b) returns 1 +*/ +COMPILER_RT_ABI si_int +__aeabi_lcmp(di_int a, di_int b) +{ + return __cmpdi2(a, b) - 1; +} +#endif + diff --git a/contrib/compiler-rt/lib/builtins/cmpti2.c b/contrib/compiler-rt/lib/builtins/cmpti2.c new file mode 100644 index 000000000000..2c8b56e29a06 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/cmpti2.c @@ -0,0 +1,42 @@ +/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __cmpti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__cmpti2(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/comparedf2.c b/contrib/compiler-rt/lib/builtins/comparedf2.c new file mode 100644 index 000000000000..44e5d2b288a6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/comparedf2.c @@ -0,0 +1,153 @@ +//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqdf2 __gedf2 __unorddf2 +// __ledf2 __gtdf2 +// __ltdf2 +// __nedf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __ledf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gedf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unorddf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT +__ledf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmpdf2, __ledf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT +__gedf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +COMPILER_RT_ABI int +__unorddf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT +__eqdf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__ltdf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__nedf2(fp_t a, fp_t b) { + return __ledf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT +__gtdf2(fp_t a, fp_t b) { + return __gedf2(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { + return __unorddf2(a, b); +} +#else +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/comparesf2.c b/contrib/compiler-rt/lib/builtins/comparesf2.c new file mode 100644 index 000000000000..43cd6a6a7003 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/comparesf2.c @@ -0,0 +1,153 @@ +//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT +__lesf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmpsf2, __lesf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT +__gesf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +COMPILER_RT_ABI int +__unordsf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT +__eqsf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__ltsf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT +__nesf2(fp_t a, fp_t b) { + return __lesf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT +__gtsf2(fp_t a, fp_t b) { + return __gesf2(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { + return __unordsf2(a, b); +} +#else +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/comparetf2.c b/contrib/compiler-rt/lib/builtins/comparetf2.c new file mode 100644 index 000000000000..c0ad8ed0aecd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/comparetf2.c @@ -0,0 +1,138 @@ +//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqtf2 __getf2 __unordtf2 +// __letf2 __gttf2 +// __lttf2 +// __netf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __letf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __getf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordtf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __letf2( ) and __getf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +enum LE_RESULT { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1 +}; + +COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } + else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + if (aInt > bInt) return LE_LESS; + else if (aInt == bInt) return LE_EQUAL; + else return LE_GREATER; + } +} + +#if defined(__ELF__) +// Alias for libgcc compatibility +FNALIAS(__cmptf2, __letf2); +#endif + +enum GE_RESULT { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { + + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; + if ((aAbs | bAbs) == 0) return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } else { + if (aInt > bInt) return GE_LESS; + else if (aInt == bInt) return GE_EQUAL; + else return GE_GREATER; + } +} + +COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} + +// The following are alternative names for the preceding routines. + +COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { + return __letf2(a, b); +} + +COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { + return __getf2(a, b); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/cpu_model.c b/contrib/compiler-rt/lib/builtins/cpu_model.c new file mode 100644 index 000000000000..fb2b899fc70a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/cpu_model.c @@ -0,0 +1,651 @@ +//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is based on LLVM's lib/Support/Host.cpp. +// It implements the operating system Host concept and builtin +// __cpu_model for the compiler_rt library, for x86 only. +// +//===----------------------------------------------------------------------===// + +#if (defined(__i386__) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(_M_X64)) && \ + (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) + +#include <assert.h> + +#define bool int +#define true 1 +#define false 0 + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#ifndef __has_attribute +#define __has_attribute(attr) 0 +#endif + +enum VendorSignatures { + SIG_INTEL = 0x756e6547 /* Genu */, + SIG_AMD = 0x68747541 /* Auth */ +}; + +enum ProcessorVendors { + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_OTHER, + VENDOR_MAX +}; + +enum ProcessorTypes { + INTEL_BONNELL = 1, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + CPU_TYPE_MAX +}; + +enum ProcessorSubtypes { + INTEL_COREI7_NEHALEM = 1, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, + CPU_SUBTYPE_MAX +}; + +enum ProcessorFeatures { + FEATURE_CMOV = 0, + FEATURE_MMX, + FEATURE_POPCNT, + FEATURE_SSE, + FEATURE_SSE2, + FEATURE_SSE3, + FEATURE_SSSE3, + FEATURE_SSE4_1, + FEATURE_SSE4_2, + FEATURE_AVX, + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG +}; + +// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). +// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID +// support. Consequently, for i386, the presence of CPUID is checked first +// via the corresponding eflags bit. +static bool isCpuIdSupported() { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__i386__) + int __cpuid_supported; + __asm__(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r"(__cpuid_supported) + : + : "eax", "ecx"); + if (!__cpuid_supported) + return false; +#endif + return true; +#endif + return true; +} + +// This code is copied from lib/Support/Host.cpp. +// Changes to either file should be mirrored in the other. + +/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in +/// the specified arguments. If we can't run cpuid on the host, return true. +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return +/// the 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +// Read control register 0 (XCR0). Used to detect features such as AVX. +static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); + return false; +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + *rEAX = Result; + *rEDX = Result >> 32; + return false; +#else + return true; +#endif +} + +static void detectX86FamilyModel(unsigned EAX, unsigned *Family, + unsigned *Model) { + *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (*Family == 6 || *Family == 0xf) { + if (*Family == 0xf) + // Examine extended family ID if family ID is F. + *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +static void +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned *Type, unsigned *Subtype) { + if (Brand_id != 0) + return; + switch (Family) { + case 6: + switch (Model) { + case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile + // processor, Intel Core 2 Quad processor, Intel Core 2 Quad + // mobile processor, Intel Core 2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm process. + case 0x16: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model + // 17h. All processors are manufactured using the 45 nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + case 0x1d: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + *Type = INTEL_CORE2; // "penryn" + break; + case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. + // As found in a Summer 2010 model iMac. + case 0x1f: + case 0x2e: // Nehalem EX + *Type = INTEL_COREI7; // "nehalem" + *Subtype = INTEL_COREI7_NEHALEM; + break; + case 0x25: // Intel Core i7, laptop version. + case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. + case 0x2f: // Westmere EX + *Type = INTEL_COREI7; // "westmere" + *Subtype = INTEL_COREI7_WESTMERE; + break; + case 0x2a: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 0x2d: + *Type = INTEL_COREI7; //"sandybridge" + *Subtype = INTEL_COREI7_SANDYBRIDGE; + break; + case 0x3a: + case 0x3e: // Ivy Bridge EP + *Type = INTEL_COREI7; // "ivybridge" + *Subtype = INTEL_COREI7_IVYBRIDGE; + break; + + // Haswell: + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + *Type = INTEL_COREI7; // "haswell" + *Subtype = INTEL_COREI7_HASWELL; + break; + + // Broadwell: + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + *Type = INTEL_COREI7; // "broadwell" + *Subtype = INTEL_COREI7_BROADWELL; + break; + + // Skylake: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + *Type = INTEL_COREI7; // "skylake" + *Subtype = INTEL_COREI7_SKYLAKE; + break; + + // Skylake Xeon: + case 0x55: + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + break; + + // Cannonlake: + case 0x66: + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake" + break; + + case 0x1c: // Most 45 nm Intel Atom processors + case 0x26: // 45 nm Atom Lincroft + case 0x27: // 32 nm Atom Medfield + case 0x35: // 32 nm Atom Midview + case 0x36: // 32 nm Atom Midview + *Type = INTEL_BONNELL; + break; // "bonnell" + + // Atom Silvermont codes from the Intel software optimization guide. + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + case 0x4c: // really airmont + *Type = INTEL_SILVERMONT; + break; // "silvermont" + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + *Type = INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + *Type = INTEL_GOLDMONT_PLUS; + break; + + case 0x57: + *Type = INTEL_KNL; // knl + break; + + case 0x85: + *Type = INTEL_KNM; // knm + break; + + default: // Unknown family 6 CPU. + break; + break; + } + default: + break; // Unknown. + } +} + +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, + unsigned *Subtype) { + // FIXME: this poorly matches the generated SubtargetFeatureKV table. There + // appears to be no way to generate the wide variety of AMD-specific targets + // from the information returned from CPUID. + switch (Family) { + case 16: + *Type = AMDFAM10H; // "amdfam10" + switch (Model) { + case 2: + *Subtype = AMDFAM10H_BARCELONA; + break; + case 4: + *Subtype = AMDFAM10H_SHANGHAI; + break; + case 8: + *Subtype = AMDFAM10H_ISTANBUL; + break; + } + break; + case 20: + *Type = AMD_BTVER1; + break; // "btver1"; + case 21: + *Type = AMDFAM15H; + if (Model >= 0x60 && Model <= 0x7f) { + *Subtype = AMDFAM15H_BDVER4; + break; // "bdver4"; 60h-7Fh: Excavator + } + if (Model >= 0x30 && Model <= 0x3f) { + *Subtype = AMDFAM15H_BDVER3; + break; // "bdver3"; 30h-3Fh: Steamroller + } + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + *Subtype = AMDFAM15H_BDVER2; + break; // "bdver2"; 02h, 10h-1Fh: Piledriver + } + if (Model <= 0x0f) { + *Subtype = AMDFAM15H_BDVER1; + break; // "bdver1"; 00h-0Fh: Bulldozer + } + break; + case 22: + *Type = AMD_BTVER2; + break; // "btver2" + case 23: + *Type = AMDFAM17H; + *Subtype = AMDFAM17H_ZNVER1; + break; + default: + break; // "generic" + } +} + +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, + unsigned *Features2Out) { + unsigned Features = 0; + unsigned Features2 = 0; + unsigned EAX, EBX; + +#define setFeature(F) \ + do { \ + if (F < 32) \ + Features |= 1U << (F & 0x1f); \ + else if (F < 64) \ + Features2 |= 1U << ((F - 32) & 0x1f); \ + } while (0) + + if ((EDX >> 15) & 1) + setFeature(FEATURE_CMOV); + if ((EDX >> 23) & 1) + setFeature(FEATURE_MMX); + if ((EDX >> 25) & 1) + setFeature(FEATURE_SSE); + if ((EDX >> 26) & 1) + setFeature(FEATURE_SSE2); + + if ((ECX >> 0) & 1) + setFeature(FEATURE_SSE3); + if ((ECX >> 1) & 1) + setFeature(FEATURE_PCLMUL); + if ((ECX >> 9) & 1) + setFeature(FEATURE_SSSE3); + if ((ECX >> 12) & 1) + setFeature(FEATURE_FMA); + if ((ECX >> 19) & 1) + setFeature(FEATURE_SSE4_1); + if ((ECX >> 20) & 1) + setFeature(FEATURE_SSE4_2); + if ((ECX >> 23) & 1) + setFeature(FEATURE_POPCNT); + if ((ECX >> 25) & 1) + setFeature(FEATURE_AES); + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + const unsigned AVXBits = (1 << 27) | (1 << 28); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + setFeature(FEATURE_AVX); + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7 && ((EBX >> 3) & 1)) + setFeature(FEATURE_BMI); + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + setFeature(FEATURE_AVX2); + if (HasLeaf7 && ((EBX >> 9) & 1)) + setFeature(FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512F); + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512PF); + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512ER); + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BW); + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VL); + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 8) & 1)) + setFeature(FEATURE_GFNI); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + setFeature(FEATURE_VPCLMULQDQ); + if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VNNI); + if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BITALG); + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VPOPCNTDQ); + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124VNNIW); + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124FMAPS); + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + setFeature(FEATURE_SSE4_A); + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + setFeature(FEATURE_XOP); + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + setFeature(FEATURE_FMA4); + + *FeaturesOut = Features; + *Features2Out = Features2; +#undef setFeature +} + +#if defined(HAVE_INIT_PRIORITY) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) +#elif __has_attribute(__constructor__) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + +int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; + +struct __processor_model { + unsigned int __cpu_vendor; + unsigned int __cpu_type; + unsigned int __cpu_subtype; + unsigned int __cpu_features[1]; +} __cpu_model = {0, 0, 0, {0}}; +unsigned int __cpu_features2; + +/* A constructor function that is sets __cpu_model and __cpu_features2 with + the right values. This needs to run only once. This constructor is + given the highest priority and it should run before constructors without + the priority set. However, it still runs after ifunc initializers and + needs to be called explicitly there. */ + +int CONSTRUCTOR_ATTRIBUTE +__cpu_indicator_init(void) { + unsigned EAX, EBX, ECX, EDX; + unsigned MaxLeaf = 5; + unsigned Vendor; + unsigned Model, Family, Brand_id; + unsigned Features = 0; + unsigned Features2 = 0; + + /* This function needs to run just once. */ + if (__cpu_model.__cpu_vendor) + return 0; + + if (!isCpuIdSupported()) + return -1; + + /* Assume cpuid insn present. Run in level 0 to get vendor id. */ + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { + __cpu_model.__cpu_vendor = VENDOR_OTHER; + return -1; + } + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + detectX86FamilyModel(EAX, &Family, &Model); + Brand_id = EBX & 0xff; + + /* Find available features. */ + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); + __cpu_model.__cpu_features[0] = Features; + __cpu_features2 = Features2; + + if (Vendor == SIG_INTEL) { + /* Get CPU type. */ + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_INTEL; + } else if (Vendor == SIG_AMD) { + /* Get CPU type. */ + getAMDProcessorTypeAndSubtype(Family, Model, Features, + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_AMD; + } else + __cpu_model.__cpu_vendor = VENDOR_OTHER; + + assert(__cpu_model.__cpu_vendor < VENDOR_MAX); + assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); + assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); + + return 0; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/ctzdi2.c b/contrib/compiler-rt/lib/builtins/ctzdi2.c new file mode 100644 index 000000000000..ef6d7fea1365 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ctzdi2.c @@ -0,0 +1,40 @@ +/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of trailing 0-bits */ + +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than + * __ctzsi2, leading to infinite recursion. */ +#define __builtin_ctz(a) __ctzsi2(a) +extern si_int __ctzsi2(si_int); +#endif + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/contrib/compiler-rt/lib/builtins/ctzsi2.c b/contrib/compiler-rt/lib/builtins/ctzsi2.c new file mode 100644 index 000000000000..c69486ea445f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ctzsi2.c @@ -0,0 +1,57 @@ +/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the number of trailing 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */ + x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */ + su_int r = t; /* r = [0, 16] */ + /* return r + ctz(x) */ + t = ((x & 0x00FF) == 0) << 3; + x >>= t; /* x = [0 - 0xFF] + higher garbage bits */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + ctz(x) */ + t = ((x & 0x0F) == 0) << 2; + x >>= t; /* x = [0 - 0xF] + higher garbage bits */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + ctz(x) */ + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + ctz(x) */ + +/* The branch-less return statement below is equivalent + * to the following switch statement: + * switch (x) + * { + * case 0: + * return r + 2; + * case 2: + * return r + 1; + * case 1: + * case 3: + * return r; + * } + */ + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} diff --git a/contrib/compiler-rt/lib/builtins/ctzti2.c b/contrib/compiler-rt/lib/builtins/ctzti2.c new file mode 100644 index 000000000000..45de682700cc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ctzti2.c @@ -0,0 +1,33 @@ +/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ctzti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the number of trailing 0-bits */ + +/* Precondition: a != 0 */ + +COMPILER_RT_ABI si_int +__ctzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.s.low == 0); + return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/divdc3.c b/contrib/compiler-rt/lib/builtins/divdc3.c new file mode 100644 index 000000000000..392d6ecacde1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divdc3.c @@ -0,0 +1,62 @@ +/* ===-- divdc3.c - Implement __divdc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divdc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Dcomplex +__divdc3(double __a, double __b, double __c, double __d) +{ + int __ilogbw = 0; + double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbn(__c, -__ilogbw); + __d = crt_scalbn(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + Dcomplex z; + COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0.0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/divdf3.c b/contrib/compiler-rt/lib/builtins/divdf3.c new file mode 100644 index 000000000000..411c82ebb87a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divdf3.c @@ -0,0 +1,193 @@ +//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t +__divdf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint32_t q31b = bSignificand >> 21; + uint32_t recip32 = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction32; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + + // recip32 might have overflowed to exactly zero in the preceding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32--; + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + const uint32_t q63blo = bSignificand << 11; + uint64_t correction, reciprocal; + correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); + uint32_t cHi = correction >> 32; + uint32_t cLo = correction; + reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 53) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 52) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const double result = fromRep(absResult | quotientSign); + return result; + } +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { + return __divdf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/divdi3.c b/contrib/compiler-rt/lib/builtins/divdi3.c new file mode 100644 index 000000000000..b8eebcb20465 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divdi3.c @@ -0,0 +1,29 @@ +/* ===-- divdi3.c - Implement __divdi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +COMPILER_RT_ABI di_int +__divdi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ + di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /*sign of quotient */ + return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +} diff --git a/contrib/compiler-rt/lib/builtins/divmoddi4.c b/contrib/compiler-rt/lib/builtins/divmoddi4.c new file mode 100644 index 000000000000..0d4df67a63e0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divmoddi4.c @@ -0,0 +1,25 @@ +/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI di_int +__divmoddi4(di_int a, di_int b, di_int* rem) +{ + di_int d = __divdi3(a,b); + *rem = a - (d*b); + return d; +} diff --git a/contrib/compiler-rt/lib/builtins/divmodsi4.c b/contrib/compiler-rt/lib/builtins/divmodsi4.c new file mode 100644 index 000000000000..dabe2874397c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divmodsi4.c @@ -0,0 +1,27 @@ +/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divmodsi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI si_int +__divmodsi4(si_int a, si_int b, si_int* rem) +{ + si_int d = __divsi3(a,b); + *rem = a - (d*b); + return d; +} + + diff --git a/contrib/compiler-rt/lib/builtins/divsc3.c b/contrib/compiler-rt/lib/builtins/divsc3.c new file mode 100644 index 000000000000..0d18a256c3de --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divsc3.c @@ -0,0 +1,63 @@ +/*===-- divsc3.c - Implement __divsc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divsc3 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Fcomplex +__divsc3(float __a, float __b, float __c, float __d) +{ + int __ilogbw = 0; + float __logbw = + __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnf(__c, -__ilogbw); + __d = crt_scalbnf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + Fcomplex z; + COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/divsf3.c b/contrib/compiler-rt/lib/builtins/divsf3.c new file mode 100644 index 000000000000..a74917fd1de5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divsf3.c @@ -0,0 +1,177 @@ +//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t +__divsf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + uint32_t q31b = bSignificand << 8; + uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 24) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 23) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + return fromRep(absResult | quotientSign); + } +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { + return __divsf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/divsi3.c b/contrib/compiler-rt/lib/builtins/divsi3.c new file mode 100644 index 000000000000..75aea008ddc1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divsi3.c @@ -0,0 +1,39 @@ +/* ===-- divsi3.c - Implement __divsi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +COMPILER_RT_ABI si_int +__divsi3(si_int a, si_int b) +{ + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */ + si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + /* + * On CPUs without unsigned hardware division support, + * this calls __udivsi3 (notice the cast to su_int). + * On CPUs with unsigned hardware division support, + * this uses the unsigned division instruction. + */ + return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ +} + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) COMPILER_RT_ALIAS(__divsi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/divtc3.c b/contrib/compiler-rt/lib/builtins/divtc3.c new file mode 100644 index 000000000000..e5ea00d841e3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divtc3.c @@ -0,0 +1,63 @@ +/*===-- divtc3.c - Implement __divtc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divtc3 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Lcomplex +__divtc3(long double __a, long double __b, long double __c, long double __d) +{ + int __ilogbw = 0; + long double __logbw = + __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0.0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/divtf3.c b/contrib/compiler-rt/lib/builtins/divtf3.c new file mode 100644 index 000000000000..e81dab826bdd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divtf3.c @@ -0,0 +1,203 @@ +//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +// For simplicity, this implementation currently flushes denormals to zero. +// It should be a fairly straightforward exercise to implement gradual +// underflow with correct rounding. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q63 fixed-point number in the range + // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint64_t q63b = bSignificand >> 49; + uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; + // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration. + uint64_t correction64; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + + // recip64 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip64 downward by one bit. + recip64--; + + // We need to perform one more iteration to get us to 112 binary digits; + // The last iteration needs to happen with extra precision. + const uint64_t q127blo = bSignificand << 15; + rep_t correction, reciprocal; + + // NOTE: This operation is equivalent to __multi3, which is not implemented + // in some architechure + rep_t r64q63, r64q127, r64cH, r64cL, dummy; + wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); + wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); + + correction = -(r64q63 + (r64q127 >> 64)); + + uint64_t cHi = correction >> 64; + uint64_t cLo = correction; + + wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); + wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); + + reciprocal = r64cH + (r64cL >> 64); + + // We already adjusted the 64-bit estimate, now we need to adjust the final + // 128-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-112, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q127 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-113 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 128 x 128 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + rep_t qb; + + if (quotient < (implicitBit << 1)) { + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 113) - qb; + quotientExponent--; + } else { + quotient >>= 1; + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 112) - qb; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + else if (writtenExponent < 1) { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + else { + const bool round = (residual << 1) >= bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const long double result = fromRep(absResult | quotientSign); + return result; + } +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/divti3.c b/contrib/compiler-rt/lib/builtins/divti3.c new file mode 100644 index 000000000000..c73eae28fe08 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divti3.c @@ -0,0 +1,33 @@ +/* ===-- divti3.c - Implement __divti3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a / b */ + +COMPILER_RT_ABI ti_int +__divti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */ + ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/divxc3.c b/contrib/compiler-rt/lib/builtins/divxc3.c new file mode 100644 index 000000000000..6f49280e5f61 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/divxc3.c @@ -0,0 +1,63 @@ +/* ===-- divxc3.c - Implement __divxc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __divxc3 for the compiler_rt library. + * + */ + +#if !_ARCH_PPC + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the quotient of (a + ib) / (c + id) */ + +COMPILER_RT_ABI Lcomplex +__divxc3(long double __a, long double __b, long double __c, long double __d) +{ + int __ilogbw = 0; + long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) + { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } + else if ((crt_isinf(__a) || crt_isinf(__b)) && + crt_isfinite(__c) && crt_isfinite(__d)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } + else if (crt_isinf(__logbw) && __logbw > 0 && + crt_isfinite(__a) && crt_isfinite(__b)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/emutls.c b/contrib/compiler-rt/lib/builtins/emutls.c new file mode 100644 index 000000000000..ef95a1c260c1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/emutls.c @@ -0,0 +1,405 @@ +/* ===---------- emutls.c - Implements __emutls_get_address ---------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "int_lib.h" +#include "int_util.h" + +#ifdef __BIONIC__ +/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation + to round 2. We need to delay deallocation because: + - Android versions older than M lack __cxa_thread_atexit_impl, so apps + use a pthread key destructor to call C++ destructors. + - Apps might use __thread/thread_local variables in pthread destructors. + We can't wait until the final two rounds, because jemalloc needs two rounds + after the final malloc/free call to free its thread-specific data (see + https://reviews.llvm.org/D46978#1107507). */ +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1 +#else +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0 +#endif + +typedef struct emutls_address_array { + uintptr_t skip_destructor_rounds; + uintptr_t size; /* number of elements in the 'data' array */ + void* data[]; +} emutls_address_array; + +static void emutls_shutdown(emutls_address_array *array); + +#ifndef _WIN32 + +#include <pthread.h> + +static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_key_t emutls_pthread_key; +static bool emutls_key_created = false; + +typedef unsigned int gcc_word __attribute__((mode(word))); +typedef unsigned int gcc_pointer __attribute__((mode(pointer))); + +/* Default is not to use posix_memalign, so systems like Android + * can use thread local data without heavier POSIX memory allocators. + */ +#ifndef EMUTLS_USE_POSIX_MEMALIGN +#define EMUTLS_USE_POSIX_MEMALIGN 0 +#endif + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base; +#if EMUTLS_USE_POSIX_MEMALIGN + if (posix_memalign(&base, align, size) != 0) + abort(); +#else + #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) + char* object; + if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + abort(); + base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) + & ~(uintptr_t)(align - 1)); + + ((void**)base)[-1] = object; +#endif + return base; +} + +static __inline void emutls_memalign_free(void *base) { +#if EMUTLS_USE_POSIX_MEMALIGN + free(base); +#else + /* The mallocated address is in ((void**)base)[-1] */ + free(((void**)base)[-1]); +#endif +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + pthread_setspecific(emutls_pthread_key, (void*) value); +} + +static __inline emutls_address_array* emutls_getspecific() { + return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); +} + +static void emutls_key_destructor(void* ptr) { + emutls_address_array *array = (emutls_address_array*)ptr; + if (array->skip_destructor_rounds > 0) { + /* emutls is deallocated using a pthread key destructor. These + * destructors are called in several rounds to accommodate destructor + * functions that (re)initialize key values with pthread_setspecific. + * Delay the emutls deallocation to accommodate other end-of-thread + * cleanup tasks like calling thread_local destructors (e.g. the + * __cxa_thread_atexit fallback in libc++abi). + */ + array->skip_destructor_rounds--; + emutls_setspecific(array); + } else { + emutls_shutdown(array); + free(ptr); + } +} + +static __inline void emutls_init(void) { + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); + emutls_key_created = true; +} + +static __inline void emutls_init_once(void) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); +} + +static __inline void emutls_lock() { + pthread_mutex_lock(&emutls_mutex); +} + +static __inline void emutls_unlock() { + pthread_mutex_unlock(&emutls_mutex); +} + +#else /* _WIN32 */ + +#include <windows.h> +#include <malloc.h> +#include <stdio.h> +#include <assert.h> + +static LPCRITICAL_SECTION emutls_mutex; +static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; + +typedef uintptr_t gcc_word; +typedef void * gcc_pointer; + +static void win_error(DWORD last_err, const char *hint) { + char *buffer = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { + fprintf(stderr, "Windows error: %s\n", buffer); + } else { + fprintf(stderr, "Unkown Windows error: %s\n", hint); + } + LocalFree(buffer); +} + +static __inline void win_abort(DWORD last_err, const char *hint) { + win_error(last_err, hint); + abort(); +} + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base = _aligned_malloc(size, align); + if (!base) + win_abort(GetLastError(), "_aligned_malloc"); + return base; +} + +static __inline void emutls_memalign_free(void *base) { + _aligned_free(base); +} + +static void emutls_exit(void) { + if (emutls_mutex) { + DeleteCriticalSection(emutls_mutex); + _aligned_free(emutls_mutex); + emutls_mutex = NULL; + } + if (emutls_tls_index != TLS_OUT_OF_INDEXES) { + emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index)); + TlsFree(emutls_tls_index); + emutls_tls_index = TLS_OUT_OF_INDEXES; + } +} + +#pragma warning (push) +#pragma warning (disable : 4100) +static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { + emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); + if (!emutls_mutex) { + win_error(GetLastError(), "_aligned_malloc"); + return FALSE; + } + InitializeCriticalSection(emutls_mutex); + + emutls_tls_index = TlsAlloc(); + if (emutls_tls_index == TLS_OUT_OF_INDEXES) { + emutls_exit(); + win_error(GetLastError(), "TlsAlloc"); + return FALSE; + } + atexit(&emutls_exit); + return TRUE; +} + +static __inline void emutls_init_once(void) { + static INIT_ONCE once; + InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); +} + +static __inline void emutls_lock() { + EnterCriticalSection(emutls_mutex); +} + +static __inline void emutls_unlock() { + LeaveCriticalSection(emutls_mutex); +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0) + win_abort(GetLastError(), "TlsSetValue"); +} + +static __inline emutls_address_array* emutls_getspecific() { + LPVOID value = TlsGetValue(emutls_tls_index); + if (value == NULL) { + const DWORD err = GetLastError(); + if (err != ERROR_SUCCESS) + win_abort(err, "TlsGetValue"); + } + return (emutls_address_array*) value; +} + +/* Provide atomic load/store functions for emutls_get_index if built with MSVC. + */ +#if !defined(__ATOMIC_RELEASE) +#include <intrin.h> + +enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; + +static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { + assert(type == __ATOMIC_ACQUIRE); + // These return the previous value - but since we do an OR with 0, + // it's equivalent to a plain load. +#ifdef _WIN64 + return InterlockedOr64(ptr, 0); +#else + return InterlockedOr(ptr, 0); +#endif +} + +static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { + assert(type == __ATOMIC_RELEASE); + InterlockedExchangePointer((void *volatile *)ptr, (void *)val); +} + +#endif /* __ATOMIC_RELEASE */ + +#pragma warning (pop) + +#endif /* _WIN32 */ + +static size_t emutls_num_object = 0; /* number of allocated TLS objects */ + +/* Free the allocated TLS data + */ +static void emutls_shutdown(emutls_address_array *array) { + if (array) { + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); + } + } +} + +/* For every TLS variable xyz, + * there is one __emutls_control variable named __emutls_v.xyz. + * If xyz has non-zero initial value, __emutls_v.xyz's "value" + * will point to __emutls_t.xyz, which has the initial value. + */ +typedef struct __emutls_control { + /* Must use gcc_word here, instead of size_t, to match GCC. When + gcc_word is larger than size_t, the upper extra bits are all + zeros. We can use variables of size_t to operate on size and + align. */ + gcc_word size; /* size of the object in bytes */ + gcc_word align; /* alignment of the object in bytes */ + union { + uintptr_t index; /* data[index-1] is the object address */ + void* address; /* object address, when in single thread env */ + } object; + void* value; /* null or non-zero initial value for the object */ +} __emutls_control; + +/* Emulated TLS objects are always allocated at run-time. */ +static __inline void *emutls_allocate_object(__emutls_control *control) { + /* Use standard C types, check with gcc's emutls.o. */ + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); + + size_t size = control->size; + size_t align = control->align; + void* base; + if (align < sizeof(void*)) + align = sizeof(void*); + /* Make sure that align is power of 2. */ + if ((align & (align - 1)) != 0) + abort(); + + base = emutls_memalign_alloc(align, size); + if (control->value) + memcpy(base, control->value, size); + else + memset(base, 0, size); + return base; +} + + +/* Returns control->object.index; set index if not allocated yet. */ +static __inline uintptr_t emutls_get_index(__emutls_control *control) { + uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); + if (!index) { + emutls_init_once(); + emutls_lock(); + index = control->object.index; + if (!index) { + index = ++emutls_num_object; + __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); + } + emutls_unlock(); + } + return index; +} + +/* Updates newly allocated thread local emutls_address_array. */ +static __inline void emutls_check_array_set_size(emutls_address_array *array, + uintptr_t size) { + if (array == NULL) + abort(); + array->size = size; + emutls_setspecific(array); +} + +/* Returns the new 'data' array size, number of elements, + * which must be no smaller than the given index. + */ +static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { + /* Need to allocate emutls_address_array with extra slots + * to store the header. + * Round up the emutls_address_array size to multiple of 16. + */ + uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); + return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; +} + +/* Returns the size in bytes required for an emutls_address_array with + * N number of elements for data field. + */ +static __inline uintptr_t emutls_asize(uintptr_t N) { + return N * sizeof(void *) + sizeof(emutls_address_array); +} + +/* Returns the thread local emutls_address_array. + * Extends its size if necessary to hold address at index. + */ +static __inline emutls_address_array * +emutls_get_address_array(uintptr_t index) { + emutls_address_array* array = emutls_getspecific(); + if (array == NULL) { + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array*) malloc(emutls_asize(new_size)); + if (array) { + memset(array->data, 0, new_size * sizeof(void*)); + array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; + } + emutls_check_array_set_size(array, new_size); + } else if (index > array->size) { + uintptr_t orig_size = array->size; + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array*) realloc(array, emutls_asize(new_size)); + if (array) + memset(array->data + orig_size, 0, + (new_size - orig_size) * sizeof(void*)); + emutls_check_array_set_size(array, new_size); + } + return array; +} + +void* __emutls_get_address(__emutls_control* control) { + uintptr_t index = emutls_get_index(control); + emutls_address_array* array = emutls_get_address_array(index--); + if (array->data[index] == NULL) + array->data[index] = emutls_allocate_object(control); + return array->data[index]; +} + +#ifdef __BIONIC__ +/* Called by Bionic on dlclose to delete the emutls pthread key. */ +__attribute__((visibility("hidden"))) +void __emutls_unregister_key(void) { + if (emutls_key_created) { + pthread_key_delete(emutls_pthread_key); + emutls_key_created = false; + } +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/enable_execute_stack.c b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c new file mode 100644 index 000000000000..327d460b4253 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c @@ -0,0 +1,72 @@ +/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifndef _WIN32 +#include <sys/mman.h> +#endif + +/* #include "config.h" + * FIXME: CMake - include when cmake system is ready. + * Remove #define HAVE_SYSCONF 1 line. + */ +#define HAVE_SYSCONF 1 + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#else +#ifndef __APPLE__ +#include <unistd.h> +#endif /* __APPLE__ */ +#endif /* _WIN32 */ + +#if __LP64__ + #define TRAMPOLINE_SIZE 48 +#else + #define TRAMPOLINE_SIZE 40 +#endif + +/* + * The compiler generates calls to __enable_execute_stack() when creating + * trampoline functions on the stack for use with nested functions. + * It is expected to mark the page(s) containing the address + * and the next 48 bytes as executable. Since the stack is normally rw- + * that means changing the protection on those page(s) to rwx. + */ + +COMPILER_RT_ABI void +__enable_execute_stack(void* addr) +{ + +#if _WIN32 + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery (addr, &mbi, sizeof(mbi))) + return; /* We should probably assert here because there is no return value */ + VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect); +#else +#if __APPLE__ + /* On Darwin, pagesize is always 4096 bytes */ + const uintptr_t pageSize = 4096; +#elif !defined(HAVE_SYSCONF) +#error "HAVE_SYSCONF not defined! See enable_execute_stack.c" +#else + const uintptr_t pageSize = sysconf(_SC_PAGESIZE); +#endif /* __APPLE__ */ + + const uintptr_t pageAlignMask = ~(pageSize-1); + uintptr_t p = (uintptr_t)addr; + unsigned char* startPage = (unsigned char*)(p & pageAlignMask); + unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask); + size_t length = endPage - startPage; + (void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); +#endif +} diff --git a/contrib/compiler-rt/lib/builtins/eprintf.c b/contrib/compiler-rt/lib/builtins/eprintf.c new file mode 100644 index 000000000000..89f34b154577 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/eprintf.c @@ -0,0 +1,35 @@ +/* ===---------- eprintf.c - Implements __eprintf --------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + + + +#include "int_lib.h" +#include <stdio.h> + + +/* + * __eprintf() was used in an old version of <assert.h>. + * It can eventually go away, but it is needed when linking + * .o files built with the old <assert.h>. + * + * It should never be exported from a dylib, so it is marked + * visibility hidden. + */ +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +COMPILER_RT_ABI void +__eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file) +{ + fprintf(stderr, format, assertion_expression, line, file); + fflush(stderr); + compilerrt_abort(); +} diff --git a/contrib/compiler-rt/lib/builtins/extenddftf2.c b/contrib/compiler-rt/lib/builtins/extenddftf2.c new file mode 100644 index 000000000000..86dab8f03a8f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/extenddftf2.c @@ -0,0 +1,23 @@ +//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_DOUBLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI long double __extenddftf2(double a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/extendhfsf2.c b/contrib/compiler-rt/lib/builtins/extendhfsf2.c new file mode 100644 index 000000000000..d9c0db84b0ce --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/extendhfsf2.c @@ -0,0 +1,33 @@ +//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define SRC_HALF +#define DST_SINGLE +#include "fp_extend_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { + return __extendXfYf2__(a); +} + +COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { + return __extendhfsf2(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_h2f(uint16_t a) { + return __extendhfsf2(a); +} +#else +AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/extendsfdf2.c b/contrib/compiler-rt/lib/builtins/extendsfdf2.c new file mode 100644 index 000000000000..3d84529a6c53 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/extendsfdf2.c @@ -0,0 +1,27 @@ +//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define SRC_SINGLE +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI double __extendsfdf2(float a) { + return __extendXfYf2__(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_f2d(float a) { + return __extendsfdf2(a); +} +#else +AEABI_RTABI double __aeabi_f2d(float a) COMPILER_RT_ALIAS(__extendsfdf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/extendsftf2.c b/contrib/compiler-rt/lib/builtins/extendsftf2.c new file mode 100644 index 000000000000..2eeeba284845 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/extendsftf2.c @@ -0,0 +1,23 @@ +//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_SINGLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI long double __extendsftf2(float a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/ffsdi2.c b/contrib/compiler-rt/lib/builtins/ffsdi2.c new file mode 100644 index 000000000000..a5ac9900ff10 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ffsdi2.c @@ -0,0 +1,33 @@ +/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffsdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffsdi2(di_int a) +{ + dwords x; + x.all = a; + if (x.s.low == 0) + { + if (x.s.high == 0) + return 0; + return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return __builtin_ctz(x.s.low) + 1; +} diff --git a/contrib/compiler-rt/lib/builtins/ffssi2.c b/contrib/compiler-rt/lib/builtins/ffssi2.c new file mode 100644 index 000000000000..e5180eff5e08 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ffssi2.c @@ -0,0 +1,29 @@ +/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffssi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffssi2(si_int a) +{ + if (a == 0) + { + return 0; + } + return __builtin_ctz(a) + 1; +} diff --git a/contrib/compiler-rt/lib/builtins/ffsti2.c b/contrib/compiler-rt/lib/builtins/ffsti2.c new file mode 100644 index 000000000000..dcdb3bd7f807 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ffsti2.c @@ -0,0 +1,37 @@ +/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffsti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffsti2(ti_int a) +{ + twords x; + x.all = a; + if (x.s.low == 0) + { + if (x.s.high == 0) + return 0; + return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); + } + return __builtin_ctzll(x.s.low) + 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/fixdfdi.c b/contrib/compiler-rt/lib/builtins/fixdfdi.c new file mode 100644 index 000000000000..54e312d3c8f7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixdfdi.c @@ -0,0 +1,55 @@ +/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int __fixunsdfdi(double a); + +COMPILER_RT_ABI di_int +__fixdfdi(double a) +{ + if (a < 0.0) { + return -__fixunsdfdi(-a); + } + return __fixunsdfdi(a); +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixdfdi(fp_t a) { + return __fixint(a); +} + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { + return __fixdfdi(a); +} +#else +AEABI_RTABI di_int __aeabi_d2lz(fp_t a) COMPILER_RT_ALIAS(__fixdfdi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixdfsi.c b/contrib/compiler-rt/lib/builtins/fixdfsi.c new file mode 100644 index 000000000000..5b9588175717 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixdfsi.c @@ -0,0 +1,30 @@ +/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int +__fixdfsi(fp_t a) { + return __fixint(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { + return __fixdfsi(a); +} +#else +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) COMPILER_RT_ALIAS(__fixdfsi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixdfti.c b/contrib/compiler-rt/lib/builtins/fixdfti.c new file mode 100644 index 000000000000..aaf225e74f86 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixdfti.c @@ -0,0 +1,26 @@ +/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixdfti(fp_t a) { + return __fixint(a); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/fixsfdi.c b/contrib/compiler-rt/lib/builtins/fixsfdi.c new file mode 100644 index 000000000000..32e87c60889f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixsfdi.c @@ -0,0 +1,55 @@ +/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int __fixunssfdi(float a); + +COMPILER_RT_ABI di_int +__fixsfdi(float a) +{ + if (a < 0.0f) { + return -__fixunssfdi(-a); + } + return __fixunssfdi(a); +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixsfdi(fp_t a) { + return __fixint(a); +} + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { + return __fixsfdi(a); +} +#else +AEABI_RTABI di_int __aeabi_f2lz(fp_t a) COMPILER_RT_ALIAS(__fixsfdi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixsfsi.c b/contrib/compiler-rt/lib/builtins/fixsfsi.c new file mode 100644 index 000000000000..e94e5f3dcd68 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixsfsi.c @@ -0,0 +1,30 @@ +/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int +__fixsfsi(fp_t a) { + return __fixint(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { + return __fixsfsi(a); +} +#else +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) COMPILER_RT_ALIAS(__fixsfsi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixsfti.c b/contrib/compiler-rt/lib/builtins/fixsfti.c new file mode 100644 index 000000000000..3a159b3e18e4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixsfti.c @@ -0,0 +1,26 @@ +/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define SINGLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixsfti(fp_t a) { + return __fixint(a); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/fixtfdi.c b/contrib/compiler-rt/lib/builtins/fixtfdi.c new file mode 100644 index 000000000000..bc9dea1f4f81 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixtfdi.c @@ -0,0 +1,23 @@ +/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int +__fixtfdi(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixtfsi.c b/contrib/compiler-rt/lib/builtins/fixtfsi.c new file mode 100644 index 000000000000..feb3de885090 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixtfsi.c @@ -0,0 +1,23 @@ +/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int +__fixtfsi(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixtfti.c b/contrib/compiler-rt/lib/builtins/fixtfti.c new file mode 100644 index 000000000000..ee4ada85cb4a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixtfti.c @@ -0,0 +1,23 @@ +/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int +__fixtfti(fp_t a) { + return __fixint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfdi.c b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c new file mode 100644 index 000000000000..bfe4dbb25656 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c @@ -0,0 +1,52 @@ +/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int +__fixunsdfdi(double a) +{ + if (a <= 0.0) return 0; + su_int high = a / 4294967296.f; /* a / 0x1p32f; */ + su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunsdfdi(fp_t a) { + return __fixuint(a); +} + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { + return __fixunsdfdi(a); +} +#else +AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfdi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfsi.c b/contrib/compiler-rt/lib/builtins/fixunsdfsi.c new file mode 100644 index 000000000000..3c5355beae1a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsdfsi.c @@ -0,0 +1,29 @@ +/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int +__fixunsdfsi(fp_t a) { + return __fixuint(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { + return __fixunsdfsi(a); +} +#else +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfsi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfti.c b/contrib/compiler-rt/lib/builtins/fixunsdfti.c new file mode 100644 index 000000000000..f8046a02632b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsdfti.c @@ -0,0 +1,23 @@ +/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunsdfti(fp_t a) { + return __fixuint(a); +} +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/fixunssfdi.c b/contrib/compiler-rt/lib/builtins/fixunssfdi.c new file mode 100644 index 000000000000..080a25bb1e99 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunssfdi.c @@ -0,0 +1,53 @@ +/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; can set the invalid + * flag as a side-effect of computation. + */ + +COMPILER_RT_ABI du_int +__fixunssfdi(float a) +{ + if (a <= 0.0f) return 0; + double da = a; + su_int high = da / 4294967296.f; /* da / 0x1p32f; */ + su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. + */ + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunssfdi(fp_t a) { + return __fixuint(a); +} + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { + return __fixunssfdi(a); +} +#else +AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunssfdi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunssfsi.c b/contrib/compiler-rt/lib/builtins/fixunssfsi.c new file mode 100644 index 000000000000..eca2916a5c88 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunssfsi.c @@ -0,0 +1,33 @@ +/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunssfsi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int +__fixunssfsi(fp_t a) { + return __fixuint(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { + return __fixunssfsi(a); +} +#else +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunssfsi); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunssfti.c b/contrib/compiler-rt/lib/builtins/fixunssfti.c new file mode 100644 index 000000000000..862d7bd6c7af --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunssfti.c @@ -0,0 +1,26 @@ +/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunssfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunssfti(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunstfdi.c b/contrib/compiler-rt/lib/builtins/fixunstfdi.c new file mode 100644 index 000000000000..b2995f65834a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunstfdi.c @@ -0,0 +1,22 @@ +/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int +__fixunstfdi(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunstfsi.c b/contrib/compiler-rt/lib/builtins/fixunstfsi.c new file mode 100644 index 000000000000..b5d3f6a7d38d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunstfsi.c @@ -0,0 +1,22 @@ +/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int +__fixunstfsi(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunstfti.c b/contrib/compiler-rt/lib/builtins/fixunstfti.c new file mode 100644 index 000000000000..22ff9dfc0339 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunstfti.c @@ -0,0 +1,22 @@ +/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int +__fixunstfti(fp_t a) { + return __fixuint(a); +} +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfdi.c b/contrib/compiler-rt/lib/builtins/fixunsxfdi.c new file mode 100644 index 000000000000..075304e78dc9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsxfdi.c @@ -0,0 +1,46 @@ +/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfdi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a unsigned long long, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * du_int is a 64 bit integral type + * value in long double is representable in du_int or is negative + * (no range checking performed) + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI du_int +__fixunsxfdi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(du_int) * CHAR_BIT) + return ~(du_int)0; + return fb.u.low.all >> (63 - e); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfsi.c b/contrib/compiler-rt/lib/builtins/fixunsxfsi.c new file mode 100644 index 000000000000..c3c70f743de8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsxfsi.c @@ -0,0 +1,45 @@ +/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfsi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a unsigned int, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * su_int is a 32 bit integral type + * value in long double is representable in su_int or is negative + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI su_int +__fixunsxfsi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(su_int) * CHAR_BIT) + return ~(su_int)0; + return fb.u.low.s.high >> (31 - e); +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfti.c b/contrib/compiler-rt/lib/builtins/fixunsxfti.c new file mode 100644 index 000000000000..fb39d00ff5b2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixunsxfti.c @@ -0,0 +1,50 @@ +/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixunsxfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a unsigned long long, rounding toward zero. + * Negative values all become zero. + */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * tu_int is a 128 bit integral type + * value in long double is representable in tu_int or is negative + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI tu_int +__fixunsxfti(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) + return ~(tu_int)0; + tu_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return r; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/fixxfdi.c b/contrib/compiler-rt/lib/builtins/fixxfdi.c new file mode 100644 index 000000000000..011787f9e4b4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixxfdi.c @@ -0,0 +1,48 @@ +/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixxfdi for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a signed long long, rounding toward zero. */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * di_int is a 64 bit integral type + * value in long double is representable in di_int (no range checking performed) + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI di_int +__fixxfdi(long double a) +{ + const di_int di_max = (di_int)((~(du_int)0) / 2); + const di_int di_min = -di_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) + return a > 0 ? di_max : di_min; + di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + di_int r = fb.u.low.all; + r = (du_int)r >> (63 - e); + return (r ^ s) - s; +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/compiler-rt/lib/builtins/fixxfti.c b/contrib/compiler-rt/lib/builtins/fixxfti.c new file mode 100644 index 000000000000..968a4f0d5eea --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fixxfti.c @@ -0,0 +1,51 @@ +/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __fixxfti for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a signed long long, rounding toward zero. */ + +/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes + * ti_int is a 128 bit integral type + * value in long double is representable in ti_int + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI ti_int +__fixxfti(long double a) +{ + const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); + const ti_int ti_min = -ti_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + ti_int r = fb.u.low.all; + if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) + return a > 0 ? ti_max : ti_min; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return (r ^ s) - s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floatdidf.c b/contrib/compiler-rt/lib/builtins/floatdidf.c new file mode 100644 index 000000000000..36b856e078d4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatdidf.c @@ -0,0 +1,115 @@ +/*===-- floatdidf.c - Implement __floatdidf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __floatdidf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * di_int is a 64 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; we'll set the inexact flag + * as a side-effect of this computation. + */ + +COMPILER_RT_ABI double +__floatdidf(di_int a) +{ + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop32 = 4294967296.0; // 0x1.0p32 + + union { int64_t x; double d; } low = { .d = twop52 }; + + const double high = (int32_t)(a >> 32) * twop32; + low.x |= a & INT64_C(0x00000000ffffffff); + + const double result = (high - twop52) + low.d; + return result; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no flags to + * set, and we don't want to code-gen to an unknown soft-float implementation. + */ + +COMPILER_RT_ABI double +__floatdidf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_l2d(di_int a) { + return __floatdidf(a); +} +#else +AEABI_RTABI double __aeabi_l2d(di_int a) COMPILER_RT_ALIAS(__floatdidf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatdisf.c b/contrib/compiler-rt/lib/builtins/floatdisf.c new file mode 100644 index 000000000000..a2f09eb2ed2c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatdisf.c @@ -0,0 +1,88 @@ +/*===-- floatdisf.c - Implement __floatdisf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + * This file implements __floatdisf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a float, rounding toward even.*/ + +/* Assumption: float is a IEEE 32 bit floating point type + * di_int is a 64 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +COMPILER_RT_ABI float +__floatdisf(di_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_l2f(di_int a) { + return __floatdisf(a); +} +#else +AEABI_RTABI float __aeabi_l2f(di_int a) COMPILER_RT_ALIAS(__floatdisf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatditf.c b/contrib/compiler-rt/lib/builtins/floatditf.c new file mode 100644 index 000000000000..cd51dd8aade4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatditf.c @@ -0,0 +1,50 @@ +//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements di_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatditf(di_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + du_int aAbs = (du_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(du_int)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatdixf.c b/contrib/compiler-rt/lib/builtins/floatdixf.c new file mode 100644 index 000000000000..d39e81d7ca75 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatdixf.c @@ -0,0 +1,46 @@ +/* ===-- floatdixf.c - Implement __floatdixf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatdixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * di_int is a 64 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floatdixf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; +} + +#endif /* !_ARCH_PPC */ diff --git a/contrib/compiler-rt/lib/builtins/floatsidf.c b/contrib/compiler-rt/lib/builtins/floatsidf.c new file mode 100644 index 000000000000..fe051123ce7c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatsidf.c @@ -0,0 +1,61 @@ +//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t +__floatsidf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)(unsigned int)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2d(int a) { + return __floatsidf(a); +} +#else +AEABI_RTABI fp_t __aeabi_i2d(int a) COMPILER_RT_ALIAS(__floatsidf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatsisf.c b/contrib/compiler-rt/lib/builtins/floatsisf.c new file mode 100644 index 000000000000..bf087ee3c295 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatsisf.c @@ -0,0 +1,67 @@ +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t +__floatsisf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) result++; + if (round == signBit) result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2f(int a) { + return __floatsisf(a); +} +#else +AEABI_RTABI fp_t __aeabi_i2f(int a) COMPILER_RT_ALIAS(__floatsisf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatsitf.c b/contrib/compiler-rt/lib/builtins/floatsitf.c new file mode 100644 index 000000000000..f0abca363b5e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatsitf.c @@ -0,0 +1,50 @@ +//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatsitf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + unsigned aAbs = (unsigned)a; + if (a < 0) { + sign = signBit; + aAbs = ~(unsigned)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floattidf.c b/contrib/compiler-rt/lib/builtins/floattidf.c new file mode 100644 index 000000000000..2702a3c8a2db --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floattidf.c @@ -0,0 +1,83 @@ +/* ===-- floattidf.c - Implement __floattidf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a double, rounding toward even.*/ + +/* Assumption: double is a IEEE 64 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI double +__floattidf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floattisf.c b/contrib/compiler-rt/lib/builtins/floattisf.c new file mode 100644 index 000000000000..f1b585f2c326 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floattisf.c @@ -0,0 +1,82 @@ +/* ===-- floattisf.c - Implement __floattisf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattisf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI float +__floattisf(ti_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floattitf.c b/contrib/compiler-rt/lib/builtins/floattitf.c new file mode 100644 index 000000000000..994fded3947b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floattitf.c @@ -0,0 +1,82 @@ +//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ti_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a ti_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floattitf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (s & 0x8000000000000000LL) /* sign */ + | (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floattixf.c b/contrib/compiler-rt/lib/builtins/floattixf.c new file mode 100644 index 000000000000..1203b3a96e75 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floattixf.c @@ -0,0 +1,84 @@ +/* ===-- floattixf.c - Implement __floattixf -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floattixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * ti_int is a 128 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floattixf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } + else + { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floatundidf.c b/contrib/compiler-rt/lib/builtins/floatundidf.c new file mode 100644 index 000000000000..8bc2a096324f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatundidf.c @@ -0,0 +1,114 @@ +/* ===-- floatundidf.c - Implement __floatundidf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * du_int is a 64 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +#ifndef __SOFT_FP__ +/* Support for systems that have hardware floating-point; we'll set the inexact flag + * as a side-effect of this computation. + */ + +COMPILER_RT_ABI double +__floatundidf(du_int a) +{ + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 + static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84 + + union { uint64_t x; double d; } high = { .d = twop84 }; + union { uint64_t x; double d; } low = { .d = twop52 }; + + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); + + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; +} + +#else +/* Support for systems that don't have hardware floating-point; there are no flags to + * set, and we don't want to code-gen to an unknown soft-float implementation. + */ + +COMPILER_RT_ABI double +__floatundidf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_ul2d(du_int a) { + return __floatundidf(a); +} +#else +AEABI_RTABI double __aeabi_ul2d(du_int a) COMPILER_RT_ALIAS(__floatundidf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatundisf.c b/contrib/compiler-rt/lib/builtins/floatundisf.c new file mode 100644 index 000000000000..844786ea7777 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatundisf.c @@ -0,0 +1,85 @@ +/*===-- floatundisf.c - Implement __floatundisf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundisf for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * du_int is a 64 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +#include "int_lib.h" + +COMPILER_RT_ABI float +__floatundisf(du_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* 8 exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_ul2f(du_int a) { + return __floatundisf(a); +} +#else +AEABI_RTABI float __aeabi_ul2f(du_int a) COMPILER_RT_ALIAS(__floatundisf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatunditf.c b/contrib/compiler-rt/lib/builtins/floatunditf.c new file mode 100644 index 000000000000..8098e95e82bc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatunditf.c @@ -0,0 +1,40 @@ +//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements du_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatunditf(du_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatundixf.c b/contrib/compiler-rt/lib/builtins/floatundixf.c new file mode 100644 index 000000000000..ca5e06d64dcd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatundixf.c @@ -0,0 +1,42 @@ +/* ===-- floatundixf.c - Implement __floatundixf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatundixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * du_int is a 64 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ +COMPILER_RT_ABI long double +__floatundixf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; +} + +#endif /* _ARCH_PPC */ diff --git a/contrib/compiler-rt/lib/builtins/floatunsidf.c b/contrib/compiler-rt/lib/builtins/floatunsidf.c new file mode 100644 index 000000000000..75cf6b9177df --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatunsidf.c @@ -0,0 +1,50 @@ +//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t +__floatunsidf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { + return __floatunsidf(a); +} +#else +AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) COMPILER_RT_ALIAS(__floatunsidf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatunsisf.c b/contrib/compiler-rt/lib/builtins/floatunsisf.c new file mode 100644 index 000000000000..29525ccedbbe --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatunsisf.c @@ -0,0 +1,58 @@ +//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t +__floatunsisf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) result++; + if (round == signBit) result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { + return __floatunsisf(a); +} +#else +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) COMPILER_RT_ALIAS(__floatunsisf); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatunsitf.c b/contrib/compiler-rt/lib/builtins/floatunsitf.c new file mode 100644 index 000000000000..1cd1842e709a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatunsitf.c @@ -0,0 +1,40 @@ +//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatuntidf.c b/contrib/compiler-rt/lib/builtins/floatuntidf.c new file mode 100644 index 000000000000..960265d80772 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatuntidf.c @@ -0,0 +1,80 @@ +/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntidf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a double, rounding toward even. */ + +/* Assumption: double is a IEEE 64 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI double +__floatuntidf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to DBL_MANT_DIG bits */ + } + else + { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floatuntisf.c b/contrib/compiler-rt/lib/builtins/floatuntisf.c new file mode 100644 index 000000000000..c0dd0275ddba --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatuntisf.c @@ -0,0 +1,79 @@ +/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntisf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a float, rounding toward even. */ + +/* Assumption: float is a IEEE 32 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ + +COMPILER_RT_ABI float +__floatuntisf(tu_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to FLT_MANT_DIG bits */ + } + else + { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/floatuntitf.c b/contrib/compiler-rt/lib/builtins/floatuntitf.c new file mode 100644 index 000000000000..e2518c93f234 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatuntitf.c @@ -0,0 +1,79 @@ +//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tu_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a tu_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floatuntitf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/floatuntixf.c b/contrib/compiler-rt/lib/builtins/floatuntixf.c new file mode 100644 index 000000000000..ea81cb1bcdae --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/floatuntixf.c @@ -0,0 +1,81 @@ +/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __floatuntixf for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: convert a to a long double, rounding toward even. */ + +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits + * tu_int is a 128 bit integral type + */ + +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | + * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +COMPILER_RT_ABI long double +__floatuntixf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) + { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } + else + { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/fp_add_impl.inc b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc new file mode 100644 index 000000000000..b47be1b648e6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc @@ -0,0 +1,144 @@ +//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float addition with the IEEE-754 default rounding +// (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __addXf3__(fp_t a, fp_t b) { + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) return b; + + // zero + anything = anything + if (!aAbs) { + // but we need to get the sign right for zero + zero + if (!bAbs) return fromRep(toRep(a) & toRep(b)); + else return b; + } + + // anything + zero = anything + if (!bAbs) return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) aExponent = normalize(&aSignificand); + if (bExponent == 0) bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything.) + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = bSignificand << (typeWidth - align); + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // sticky; b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } + else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if (aExponent >= maxExponent) return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = aSignificand << (typeWidth - shift); + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if (roundGuardSticky > 0x4) result++; + if (roundGuardSticky == 0x4) result += result & 1; + return fromRep(result); +} diff --git a/contrib/compiler-rt/lib/builtins/fp_extend.h b/contrib/compiler-rt/lib/builtins/fp_extend.h new file mode 100644 index 000000000000..6d95a0680709 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_extend.h @@ -0,0 +1,89 @@ +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Set source and destination setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_EXTEND_HEADER +#define FP_EXTEND_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; +#define src_rep_t_clz __builtin_clz + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; +static __inline int src_rep_t_clz(src_rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); +#endif +} + +#elif defined SRC_HALF +typedef uint16_t src_t; +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcSigBits = 10; +#define src_rep_t_clz __builtin_clz + +#else +#error Source should be half, single, or double precision! +#endif //end source precision + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_QUAD +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstSigBits = 112; + +#else +#error Destination should be single, double, or quad precision! +#endif //end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { src_t f; src_rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { dst_t f; dst_rep_t i; } rep = {.i = x}; + return rep.f; +} +// End helper routines. Conversion implementation follows. + +#endif //FP_EXTEND_HEADER diff --git a/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc new file mode 100644 index 000000000000..b785cc7687ad --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc @@ -0,0 +1,108 @@ +//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a narrower to a wider +// IEEE-754 floating-point type. The constants and types defined following the +// includes below parameterize the conversion. +// +// It does not support types that don't use the usual IEEE-754 interchange +// formats; specifically, some work would be needed to adapt it to +// (for example) the Intel 80-bit format or PowerPC double-double format. +// +// Note please, however, that this implementation is only intended to support +// *widening* operations; if you need to convert to a *narrower* floating-point +// type (e.g. double -> float), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. You also may +// run into trouble finding an appropriate CLZ function for wide source types; +// you will likely need to roll your own on some platforms. +// +// Finally, the following assumptions are made: +// +// 1. floating-point types and integer types have the same endianness on the +// target platform +// +// 2. quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set +// +//===----------------------------------------------------------------------===// + +#include "fp_extend.h" + +static __inline dst_t __extendXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t)*CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t)*CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to src_rep_t. + if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + } + + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (dst_rep_t)resultExponent << dstSigBits; + } + + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + return dstFromRep(result); +} diff --git a/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc new file mode 100644 index 000000000000..da70d4d39301 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc @@ -0,0 +1,41 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float to integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixint_t __fixint(fp_t a) { + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixint_t)significand << (exponent - significandBits)); +} diff --git a/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc new file mode 100644 index 000000000000..d68ccf27a79c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc @@ -0,0 +1,39 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float to unsigned integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixuint_t __fixuint(fp_t a) { + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If either the value or the exponent is negative, the result is zero. + if (sign == -1 || exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) + return ~(fixuint_t)0; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return significand >> (significandBits - exponent); + else + return (fixuint_t)significand << (exponent - significandBits); +} diff --git a/contrib/compiler-rt/lib/builtins/fp_lib.h b/contrib/compiler-rt/lib/builtins/fp_lib.h new file mode 100644 index 000000000000..a0e19ab6a8f6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_lib.h @@ -0,0 +1,327 @@ +//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for soft-float routines in compiler-rt. +// This file does not provide any part of the compiler-rt interface, but defines +// many useful constants and utility routines that are used in the +// implementation of the soft-float routines in compiler-rt. +// +// Assumes that float, double and long double correspond to the IEEE-754 +// binary32, binary64 and binary 128 types, respectively, and that integer +// endianness matches floating point endianness on the target platform. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_LIB_HEADER +#define FP_LIB_HEADER + +#include <stdint.h> +#include <stdbool.h> +#include <limits.h> +#include "int_lib.h" +#include "int_math.h" + +// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in +// 32-bit mode. +#if defined(__FreeBSD__) && defined(__i386__) +# include <sys/param.h> +# if __FreeBSD_version < 903000 // v9.3 +# define uint64_t unsigned long long +# define int64_t long long +# undef UINT64_C +# define UINT64_C(c) (c ## ULL) +# endif +#endif + +#if defined SINGLE_PRECISION + +typedef uint32_t rep_t; +typedef int32_t srep_t; +typedef float fp_t; +#define REP_C UINT32_C +#define significandBits 23 + +static __inline int rep_clz(rep_t a) { + return __builtin_clz(a); +} + +// 32x32 --> 64 bit multiply +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a*b; + *hi = product >> 32; + *lo = product; +} +COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); + +#elif defined DOUBLE_PRECISION + +typedef uint64_t rep_t; +typedef int64_t srep_t; +typedef double fp_t; +#define REP_C UINT64_C +#define significandBits 52 + +static __inline int rep_clz(rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); +#endif +} + +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} +#undef loWord +#undef hiWord + +COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); + +#elif defined QUAD_PRECISION +#if __LDBL_MANT_DIG__ == 113 +#define CRT_LDBL_128BIT +typedef __uint128_t rep_t; +typedef __int128_t srep_t; +typedef long double fp_t; +#define REP_C (__uint128_t) +// Note: Since there is no explicit way to tell compiler the constant is a +// 128-bit integer, we let the constant be casted to 128-bit integer +#define significandBits 112 + +static __inline int rep_clz(rep_t a) { + const union + { + __uint128_t ll; +#if _YUGA_BIG_ENDIAN + struct { uint64_t high, low; } s; +#else + struct { uint64_t low, high; } s; +#endif + } uu = { .ll = a }; + + uint64_t word; + uint64_t add; + + if (uu.s.high){ + word = uu.s.high; + add = 0; + } + else{ + word = uu.s.low; + add = 64; + } + return __builtin_clzll(word) + add; +} + +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_FullMask UINT64_C(0xffffffffffffffff) +#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) +#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) +#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) +#define Word_4(a) (uint64_t)(a & Word_LoMask) + +// 128x128 -> 256 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + + (__uint128_t)product43; + const __uint128_t sum2 = (__uint128_t)product24 + + (__uint128_t)product33 + + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + + (__uint128_t)product23 + + (__uint128_t)product32 + + (__uint128_t)product41; + const __uint128_t sum4 = (__uint128_t)product13 + + (__uint128_t)product22 + + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + + ((sum3 << 32) & Word_HiMask); + + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64); +} +#undef Word_1 +#undef Word_2 +#undef Word_3 +#undef Word_4 +#undef Word_HiMask +#undef Word_LoMask +#undef Word_FullMask +#endif // __LDBL_MANT_DIG__ == 113 +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) +#define typeWidth (sizeof(rep_t)*CHAR_BIT) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +static __inline rep_t toRep(fp_t x) { + const union { fp_t f; rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline fp_t fromRep(rep_t x) { + const union { fp_t f; rep_t i; } rep = {.i = x}; + return rep.f; +} + +static __inline int normalize(rep_t *significand) { + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; +} + +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; +} + +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { + if (count < typeWidth) { + const bool sticky = *lo << (typeWidth - count); + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } + else if (count < 2*typeWidth) { + const bool sticky = *hi << (2*typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } +} + +// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids +// pulling in a libm dependency from compiler-rt, but is not meant to replace +// it (i.e. code calling logb() should get the one from libm, not this), hence +// the __compiler_rt prefix. +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + // Abnormal cases: + // 1) +/- inf returns +inf; NaN returns NaN + // 2) 0.0 returns -inf + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) { + return x; // NaN or +inf: return x + } else { + return -x; // -inf: return -x + } + } else if (x == 0.0) { + // 0.0: return -inf + return fromRep(infRep | signBit); + } + + if (exp != 0) { + // Normal number + return exp - exponentBias; // Unbias exponent + } else { + // Subnormal number; normalize and repeat + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; // Unbias exponent + } +} +#endif + +#if defined(SINGLE_PRECISION) +static __inline fp_t __compiler_rt_logbf(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(DOUBLE_PRECISION) +static __inline fp_t __compiler_rt_logb(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(QUAD_PRECISION) + #if defined(CRT_LDBL_128BIT) +static __inline fp_t __compiler_rt_logbl(fp_t x) { + return __compiler_rt_logbX(x); +} + #else +// The generic implementation only works for ieee754 floating point. For other +// floating point types, continue to rely on the libm implementation for now. +static __inline long double __compiler_rt_logbl(long double x) { + return crt_logbl(x); +} + #endif +#endif + +#endif // FP_LIB_HEADER diff --git a/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc new file mode 100644 index 000000000000..b34aa1b8f544 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc @@ -0,0 +1,116 @@ +//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float multiplication with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __mulXf3__(fp_t a, fp_t b) { + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) return fromRep(aAbs | productSign); + // infinity * zero = NaN + else return fromRep(qnanRep); + } + + if (bAbs == infRep) { + //? non-zero * infinity = +/- infinity + if (aAbs) return fromRep(bAbs | productSign); + // zero * infinity = NaN + else return fromRep(qnanRep); + } + + // zero * anything = +/- zero + if (!aAbs) return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) return fromRep(productSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) scale += normalize(&aSignificand); + if (bAbs < implicitBit) scale += normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, + &productHi, &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand, adjust exponent if needed. + if (productHi & implicitBit) productExponent++; + else wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } + else { + // Result is normal before rounding; insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result: + productHi |= productSign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) productHi++; + if (productLo == signBit) productHi += productHi & 1; + return fromRep(productHi); +} diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc.h b/contrib/compiler-rt/lib/builtins/fp_trunc.h new file mode 100644 index 000000000000..d5e79bb5b863 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_trunc.h @@ -0,0 +1,76 @@ +//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Set source and destination precision setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_TRUNC_HEADER +#define FP_TRUNC_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; + +#elif defined SRC_QUAD +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcSigBits = 112; + +#else +#error Source should be double precision or quad precision! +#endif //end source precision + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_HALF +typedef uint16_t dst_t; +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstSigBits = 10; + +#else +#error Destination should be single precision or double precision! +#endif //end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { src_t f; src_rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { dst_t f; dst_rep_t i; } rep = {.i = x}; + return rep.f; +} + +#endif // FP_TRUNC_HEADER diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc new file mode 100644 index 000000000000..d88ae060913f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc @@ -0,0 +1,135 @@ +//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a wider to a narrower +// IEEE-754 floating-point type in the default (round to nearest, ties to even) +// rounding mode. The constants and types defined following the includes below +// parameterize the conversion. +// +// This routine can be trivially adapted to support conversions to +// half-precision or from quad-precision. It does not support types that don't +// use the usual IEEE-754 interchange formats; specifically, some work would be +// needed to adapt it to (for example) the Intel 80-bit format or PowerPC +// double-double format. +// +// Note please, however, that this implementation is only intended to support +// *narrowing* operations; if you need to convert to a *wider* floating-point +// type (e.g. float -> double), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. +// +// Finally, the following assumptions are made: +// +// 1. floating-point types and integer types have the same endianness on the +// target platform +// +// 2. quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set +// +//===----------------------------------------------------------------------===// + +#include "fp_trunc.h" + +static __inline dst_t __truncXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t)*CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcSignificandMask = srcMinNormal - 1; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t)*CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; + const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + if (aAbs - underflow < aAbs - overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = aAbs >> (srcSigBits - dstSigBits); + absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + + const src_rep_t roundBits = aAbs & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= dstQNaN; + absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; + } + else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + } + else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const int aExp = aAbs >> srcSigBits; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigBits) { + absResult = 0; + } else { + const bool sticky = significand << (srcBits - shift); + src_rep_t denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + const src_rep_t roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + } + + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | sign >> (srcBits - dstBits); + return dstFromRep(result); +} diff --git a/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c new file mode 100644 index 000000000000..68581ef16434 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c @@ -0,0 +1,251 @@ +/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + */ + +#include "int_lib.h" + +#include <unwind.h> +#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && !defined(__USING_SJLJ_EXCEPTIONS__) +/* + * When building with older compilers (e.g. clang <3.9), it is possible that we + * have a version of unwind.h which does not provide the EHABI declarations + * which are quired for the C personality to conform to the specification. In + * order to provide forward compatibility for such compilers, we re-declare the + * necessary interfaces in the helper to permit a standalone compilation of the + * builtins (which contains the C unwinding personality for historical reasons). + */ +#include "unwind-ehabi-helpers.h" +#endif + +/* + * Pointer encodings documented at: + * http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html + */ + +#define DW_EH_PE_omit 0xff /* no data follows */ + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 /* gcc extension */ + + + +/* read a uleb128 encoded value and advance pointer */ +static uintptr_t readULEB128(const uint8_t** data) +{ + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t* p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; +} + +/* read a pointer encoded value and advance pointer */ +static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) +{ + const uint8_t* p = *data; + uintptr_t result = 0; + + if ( encoding == DW_EH_PE_omit ) + return 0; + + /* first get value */ + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((const uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((const uint16_t*)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((const uint32_t*)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((const uint64_t*)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((const int16_t*)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((const int32_t*)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((const int64_t*)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then add relative offset */ + switch ( encoding & 0x70 ) { + case DW_EH_PE_absptr: + /* do nothing */ + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then apply indirection */ + if (encoding & DW_EH_PE_indirect) { + result = *((const uintptr_t*)result); + } + + *data = p; + return result; +} + +#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ + !defined(__ARM_DWARF_EH__) +#define USING_ARM_EHABI 1 +_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *, + struct _Unwind_Context *); +#endif + +static inline _Unwind_Reason_Code +continueUnwind(struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) { +#if USING_ARM_EHABI + /* + * On ARM EHABI the personality routine is responsible for actually + * unwinding a single stack frame before returning (ARM EHABI Sec. 6.1). + */ + if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK) + return _URC_FAILURE; +#endif + return _URC_CONTINUE_UNWIND; +} + +/* + * The C compiler makes references to __gcc_personality_v0 in + * the dwarf unwind information for translation units that use + * __attribute__((cleanup(xx))) on local variables. + * This personality routine is called by the system unwinder + * on each frame as the stack is unwound during a C++ exception + * throw through a C function compiled with -fexceptions. + */ +#if __USING_SJLJ_EXCEPTIONS__ +/* the setjump-longjump based exceptions personality routine has a + * different name */ +COMPILER_RT_ABI _Unwind_Reason_Code +__gcc_personality_sj0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + struct _Unwind_Context *context) +#elif USING_ARM_EHABI +/* The ARM EHABI personality routine has a different signature. */ +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + _Unwind_State state, struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) +#else +COMPILER_RT_ABI _Unwind_Reason_Code +__gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + struct _Unwind_Context *context) +#endif +{ + /* Since C does not have catch clauses, there is nothing to do during */ + /* phase 1 (the search phase). */ +#if USING_ARM_EHABI + /* After resuming from a cleanup we should also continue on to the next + * frame straight away. */ + if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING) +#else + if ( actions & _UA_SEARCH_PHASE ) +#endif + return continueUnwind(exceptionObject, context); + + /* There is nothing to do if there is no LSDA for this frame. */ + const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context); + if ( lsda == (uint8_t*) 0 ) + return continueUnwind(exceptionObject, context); + + uintptr_t pc = (uintptr_t)_Unwind_GetIP(context)-1; + uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + /* Parse LSDA header. */ + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + /* Walk call-site table looking for range that includes current PC. */ + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t* callSiteTableStart = lsda; + const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t* p=callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + uintptr_t length = readEncodedPointer(&p, callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); /* action value not used for C code */ + if ( landingPad == 0 ) + continue; /* no landing pad for this entry */ + if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { + /* Found landing pad for the PC. + * Set Instruction Pointer to so we re-enter function + * at landing pad. The landing pad is created by the compiler + * to take two parameters in registers. + */ + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, (funcStart + landingPad)); + return _URC_INSTALL_CONTEXT; + } + } + + /* No landing pad found, continue unwinding. */ + return continueUnwind(exceptionObject, context); +} diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S new file mode 100644 index 000000000000..d5479d2a52fa --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S @@ -0,0 +1,103 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Functions that implement common sequences in function prologues and epilogues + used to save code size */ + + .macro FUNCTION_BEGIN name + .text + .globl \name + .type \name, @function + .falign +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function + .falign +\name1: + .endm + + + + +/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */ + + + + +/* The compiler knows that the __save_* functions clobber LR. No other + registers should be used without informing the compiler. */ + +/* Since we can only issue one store per packet, we don't hurt performance by + simply jumping to the right point in this sequence of stores. */ + +FUNCTION_BEGIN __save_r24_through_r27 + memd(fp+#-16) = r27:26 +FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 + { + memd(fp+#-8) = r25:24 + jumpr lr + } +FUNCTION_END __save_r24_through_r25 + + + + +/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel + with deallocframe. That way, the return gets the old value of lr, which is + where these functions need to return, and at the same time, lr gets the value + it needs going into the tail call. */ + +FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall + r27:26 = memd(fp+#-16) +FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall + { + r25:24 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall + + + + +/* Here we use the extra load bandwidth to restore LR early, allowing the return + to occur in parallel with the deallocframe. */ + +FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe + { + lr = memw(fp+#4) + r27:26 = memd(fp+#-16) + } + { + r25:24 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r24_through_r27_and_deallocframe + + + + +/* Here the load bandwidth is maximized. */ + +FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe + { + r25:24 = memd(fp+#-8) + deallocframe + } + jumpr lr +FUNCTION_END __restore_r24_through_r25_and_deallocframe diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S new file mode 100644 index 000000000000..6f470343db49 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S @@ -0,0 +1,268 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Functions that implement common sequences in function prologues and epilogues + used to save code size */ + + .macro FUNCTION_BEGIN name + .p2align 2 + .section .text.\name,"ax",@progbits + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .p2align 2 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function +\name1: + .endm + + + + +/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at + fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48. + The compiler knows that the __save_* functions clobber LR. No other + registers should be used without informing the compiler. */ + +FUNCTION_BEGIN __save_r16_through_r27 + { + memd(fp+#-48) = r27:26 + memd(fp+#-40) = r25:24 + } + { + memd(fp+#-32) = r23:22 + memd(fp+#-24) = r21:20 + } + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r27 + +FUNCTION_BEGIN __save_r16_through_r25 + { + memd(fp+#-40) = r25:24 + memd(fp+#-32) = r23:22 + } + { + memd(fp+#-24) = r21:20 + memd(fp+#-16) = r19:18 + } + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r25 + +FUNCTION_BEGIN __save_r16_through_r23 + { + memd(fp+#-32) = r23:22 + memd(fp+#-24) = r21:20 + } + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r23 + +FUNCTION_BEGIN __save_r16_through_r21 + { + memd(fp+#-24) = r21:20 + memd(fp+#-16) = r19:18 + } + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r21 + +FUNCTION_BEGIN __save_r16_through_r19 + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r19 + +FUNCTION_BEGIN __save_r16_through_r17 + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r17 + +/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel + with deallocframe. That way, the return gets the old value of lr, which is + where these functions need to return, and at the same time, lr gets the value + it needs going into the tail call. */ + + +FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall + r27:26 = memd(fp+#-48) + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall + { + r23:22 = memd(fp+#-32) + r21:20 = memd(fp+#-24) + } + r19:18 = memd(fp+#-16) + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall + + +FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall + r19:18 = memd(fp+#-16) + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall + + +FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe + r27:26 = memd(fp+#-48) + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r27_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r25_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe + { + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r23_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r21_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe + { + r19:18 = memd(fp+#-16) + r17:16 = memd(fp+#-8) + } + { + dealloc_return + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r17_and_deallocframe + +FUNCTION_BEGIN __deallocframe + dealloc_return +FUNCTION_END __deallocframe diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S new file mode 100644 index 000000000000..3258f15a3267 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S @@ -0,0 +1,157 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +/* Functions that implement common sequences in function prologues and epilogues + used to save code size */ + + .macro FUNCTION_BEGIN name + .text + .globl \name + .type \name, @function + .falign +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function + .falign +\name1: + .endm + + + + +/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at + fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */ + + + + +/* The compiler knows that the __save_* functions clobber LR. No other + registers should be used without informing the compiler. */ + +/* Since we can only issue one store per packet, we don't hurt performance by + simply jumping to the right point in this sequence of stores. */ + +FUNCTION_BEGIN __save_r27_through_r16 + memd(fp+#-48) = r17:16 +FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18 + memd(fp+#-40) = r19:18 +FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20 + memd(fp+#-32) = r21:20 +FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22 + memd(fp+#-24) = r23:22 +FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24 + memd(fp+#-16) = r25:24 + { + memd(fp+#-8) = r27:26 + jumpr lr + } +FUNCTION_END __save_r27_through_r24 + + + + +/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel + with deallocframe. That way, the return gets the old value of lr, which is + where these functions need to return, and at the same time, lr gets the value + it needs going into the sibcall. */ + +FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall + { + r21:20 = memd(fp+#-32) + r23:22 = memd(fp+#-24) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall + { + r25:24 = memd(fp+#-16) + jump __restore_r27_through_r26_and_deallocframe_before_sibcall + } +FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall + + + + +FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall + r17:16 = memd(fp+#-48) +FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall + { + r19:18 = memd(fp+#-40) + r21:20 = memd(fp+#-32) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall + { + r23:22 = memd(fp+#-24) + r25:24 = memd(fp+#-16) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall + { + r27:26 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall + + + + +/* Here we use the extra load bandwidth to restore LR early, allowing the return + to occur in parallel with the deallocframe. */ + +FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe + { + r17:16 = memd(fp+#-48) + r19:18 = memd(fp+#-40) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe + { + r21:20 = memd(fp+#-32) + r23:22 = memd(fp+#-24) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe + { + lr = memw(fp+#4) + r25:24 = memd(fp+#-16) + } + { + r27:26 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r27_through_r24_and_deallocframe + + + + +/* Here the load bandwidth is maximized for all three functions. */ + +FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe + { + r19:18 = memd(fp+#-40) + r21:20 = memd(fp+#-32) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe + { + r23:22 = memd(fp+#-24) + r25:24 = memd(fp+#-16) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe + { + r27:26 = memd(fp+#-8) + deallocframe + } + jumpr lr +FUNCTION_END __restore_r27_through_r26_and_deallocframe diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S b/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S new file mode 100644 index 000000000000..4173f86a4f54 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S @@ -0,0 +1,398 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Double Precision Multiply */ + +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + +#define EXPA r4 +#define EXPB r5 +#define EXPB_A r5:4 + +#define ZTMP r7:6 +#define ZTMPH r7 +#define ZTMPL r6 + +#define ATMP r13:12 +#define ATMPH r13 +#define ATMPL r12 + +#define BTMP r9:8 +#define BTMPH r9 +#define BTMPL r8 + +#define ATMP2 r11:10 +#define ATMP2H r11 +#define ATMP2L r10 + +#define EXPDIFF r15 +#define EXTRACTOFF r14 +#define EXTRACTAMT r15:14 + +#define TMP r28 + +#define MANTBITS 52 +#define HI_MANTBITS 20 +#define EXPBITS 11 +#define BIAS 1024 +#define MANTISSA_TO_INT_BIAS 52 +#define SR_BIT_INEXACT 5 + +#ifndef SR_ROUND_OFF +#define SR_ROUND_OFF 22 +#endif + +#define NORMAL p3 +#define BIGB p2 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_adddf3 + .global __hexagon_subdf3 + .type __hexagon_adddf3, @function + .type __hexagon_subdf3, @function + +Q6_ALIAS(adddf3) +FAST_ALIAS(adddf3) +FAST2_ALIAS(adddf3) +Q6_ALIAS(subdf3) +FAST_ALIAS(subdf3) +FAST2_ALIAS(subdf3) + + .p2align 5 +__hexagon_adddf3: + { + EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) + ATMP = combine(##0x20000000,#0) + } + { + NORMAL = dfclass(A,#2) + NORMAL = dfclass(B,#2) + BTMP = ATMP + BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? + } + { + if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code + if (BIGB) A = B // if B >> A, swap A and B + if (BIGB) B = A // If B >> A, swap A and B + if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents + } + { + ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 + BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 + EXPDIFF = sub(EXPA,EXPB) + ZTMP = combine(#62,#1) + } +#undef BIGB +#undef NORMAL +#define B_POS p3 +#define A_POS p2 +#define NO_STICKIES p1 +.Ladd_continue: + { + EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, + // will collapse to sticky bit + ATMP2 = neg(ATMP) + A_POS = cmp.gt(AH,#-1) + EXTRACTOFF = #0 + } + { + if (!A_POS) ATMP = ATMP2 + ATMP2 = extractu(BTMP,EXTRACTAMT) + BTMP = ASR(BTMP,EXPDIFF) +#undef EXTRACTAMT +#undef EXPDIFF +#undef EXTRACTOFF +#define ZERO r15:14 + ZERO = #0 + } + { + NO_STICKIES = cmp.eq(ATMP2,ZERO) + if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) + EXPB = add(EXPA,#-BIAS-60) + B_POS = cmp.gt(BH,#-1) + } + { + ATMP = add(ATMP,BTMP) // ADD!!! + ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! + ZTMP = combine(#54,##2045) + } + { + p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation + p0 = !cmp.gtu(EXPA,ZTMPL) + if (!p0.new) jump:nt .Ladd_ovf_unf + if (!B_POS) ATMP = ATMP2 // if B neg, pick difference + } + { + A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! + p0 = cmp.eq(ATMPH,#0) + p0 = cmp.eq(ATMPL,#0) + if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? + } + { + AH += asl(EXPB,#HI_MANTBITS) + jumpr r31 + } + .falign +__hexagon_subdf3: + { + BH = togglebit(BH,#31) + jump __qdsp_adddf3 + } + + + .falign +.Ladd_zero: + // True zero, full cancellation + // +0 unless round towards negative infinity + { + TMP = USR + A = #0 + BH = #1 + } + { + TMP = extractu(TMP,#2,#22) + BH = asl(BH,#31) + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = xor(AH,BH) + jumpr r31 + } + .falign +.Ladd_ovf_unf: + // Overflow or Denormal is possible + // Good news: Underflow flag is not possible! + /* + * ATMP has 2's complement value + * + * EXPA has A's exponent, EXPB has EXPA-BIAS-60 + * + * Convert, extract exponent, add adjustment. + * If > 2046, overflow + * If <= 0, denormal + * + * Note that we've not done our zero check yet, so do that too + * + */ + { + A = convert_d2df(ATMP) + p0 = cmp.eq(ATMPH,#0) + p0 = cmp.eq(ATMPL,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) + AH += asl(EXPB,#HI_MANTBITS) + } + { + EXPB = add(EXPB,TMP) + B = combine(##0x00100000,#0) + } + { + p0 = cmp.gt(EXPB,##BIAS+BIAS-2) + if (p0.new) jump:nt .Ladd_ovf + } + { + p0 = cmp.gt(EXPB,#0) + if (p0.new) jumpr:t r31 + TMP = sub(#1,EXPB) + } + { + B = insert(A,#MANTBITS,#0) + A = ATMP + } + { + B = lsr(B,TMP) + } + { + A = insert(B,#63,#0) + jumpr r31 + } + .falign +.Ladd_ovf: + // We get either max finite value or infinity. Either way, overflow+inexact + { + A = ATMP // 2's complement value + TMP = USR + ATMP = combine(##0x7fefffff,#-1) // positive max finite + } + { + EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits + TMP = or(TMP,#0x28) // inexact + overflow + BTMP = combine(##0x7ff00000,#0) // positive infinity + } + { + USR = TMP + EXPB ^= lsr(AH,#31) // Does sign match rounding? + TMP = EXPB // unmodified rounding mode + } + { + p0 = !cmp.eq(TMP,#1) // If not round-to-zero and + p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, + if (p0.new) ATMP = BTMP // we should get infinity + } + { + A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign + } + { + p0 = dfcmp.eq(A,A) + jumpr r31 + } + +.Ladd_abnormal: + { + ATMP = extractu(A,#63,#0) // strip off sign + BTMP = extractu(B,#63,#0) // strip off sign + } + { + p3 = cmp.gtu(ATMP,BTMP) + if (!p3.new) A = B // sort values + if (!p3.new) B = A // sort values + } + { + // Any NaN --> NaN, possibly raise invalid if sNaN + p0 = dfclass(A,#0x0f) // A not NaN? + if (!p0.new) jump:nt .Linvalid_nan_add + if (!p3) ATMP = BTMP + if (!p3) BTMP = ATMP + } + { + // Infinity + non-infinity number is infinity + // Infinity + infinity --> inf or nan + p1 = dfclass(A,#0x08) // A is infinity + if (p1.new) jump:nt .Linf_add + } + { + p2 = dfclass(B,#0x01) // B is zero + if (p2.new) jump:nt .LB_zero // so return A or special 0+0 + ATMP = #0 + } + // We are left with adding one or more subnormals + { + p0 = dfclass(A,#4) + if (p0.new) jump:nt .Ladd_two_subnormal + ATMP = combine(##0x20000000,#0) + } + { + EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXPB = #1 + // BTMP already ABS(B) + BTMP = asl(BTMP,#EXPBITS-2) + } +#undef ZERO +#define EXTRACTOFF r14 +#define EXPDIFF r15 + { + ATMP = insert(A,#MANTBITS,#EXPBITS-2) + EXPDIFF = sub(EXPA,EXPB) + ZTMP = combine(#62,#1) + jump .Ladd_continue + } + +.Ladd_two_subnormal: + { + ATMP = extractu(A,#63,#0) + BTMP = extractu(B,#63,#0) + } + { + ATMP = neg(ATMP) + BTMP = neg(BTMP) + p0 = cmp.gt(AH,#-1) + p1 = cmp.gt(BH,#-1) + } + { + if (p0) ATMP = A + if (p1) BTMP = B + } + { + ATMP = add(ATMP,BTMP) + } + { + BTMP = neg(ATMP) + p0 = cmp.gt(ATMPH,#-1) + B = #0 + } + { + if (!p0) A = BTMP + if (p0) A = ATMP + BH = ##0x80000000 + } + { + if (!p0) AH = or(AH,BH) + p0 = dfcmp.eq(A,B) + if (p0.new) jump:nt .Lzero_plus_zero + } + { + jumpr r31 + } + +.Linvalid_nan_add: + { + TMP = convert_df2sf(A) // will generate invalid if sNaN + p0 = dfclass(B,#0x0f) // if B is not NaN + if (p0.new) B = A // make it whatever A is + } + { + BL = convert_df2sf(B) // will generate invalid if sNaN + A = #-1 + jumpr r31 + } + .falign +.LB_zero: + { + p0 = dfcmp.eq(ATMP,A) // is A also zero? + if (!p0.new) jumpr:t r31 // If not, just return A + } + // 0 + 0 is special + // if equal integral values, they have the same sign, which is fine for all rounding + // modes. + // If unequal in sign, we get +0 for all rounding modes except round down +.Lzero_plus_zero: + { + p0 = cmp.eq(A,B) + if (p0.new) jumpr:t r31 + } + { + TMP = USR + } + { + TMP = extractu(TMP,#2,#SR_ROUND_OFF) + A = #0 + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = ##0x80000000 + jumpr r31 + } +.Linf_add: + // adding infinities is only OK if they are equal + { + p0 = !cmp.eq(AH,BH) // Do they have different signs + p0 = dfclass(B,#8) // And is B also infinite? + if (!p0.new) jumpr:t r31 // If not, just a normal inf + } + { + BL = ##0x7f800001 // sNAN + } + { + A = convert_sf2df(BL) // trigger invalid, set NaN + jumpr r31 + } +END(__hexagon_adddf3) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S b/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S new file mode 100644 index 000000000000..0c5dbe272c89 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S @@ -0,0 +1,492 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Double Precision Divide */ + +#define A r1:0 +#define AH r1 +#define AL r0 + +#define B r3:2 +#define BH r3 +#define BL r2 + +#define Q r5:4 +#define QH r5 +#define QL r4 + +#define PROD r7:6 +#define PRODHI r7 +#define PRODLO r6 + +#define SFONE r8 +#define SFDEN r9 +#define SFERROR r10 +#define SFRECIP r11 + +#define EXPBA r13:12 +#define EXPB r13 +#define EXPA r12 + +#define REMSUB2 r15:14 + + + +#define SIGN r28 + +#define Q_POSITIVE p3 +#define NORMAL p2 +#define NO_OVF_UNF p1 +#define P_TMP p0 + +#define RECIPEST_SHIFT 3 +#define QADJ 61 + +#define DFCLASS_NORMAL 0x02 +#define DFCLASS_NUMBER 0x0F +#define DFCLASS_INFINITE 0x08 +#define DFCLASS_ZERO 0x01 +#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) +#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) + +#define DF_MANTBITS 52 +#define DF_EXPBITS 11 +#define SF_MANTBITS 23 +#define SF_EXPBITS 8 +#define DF_BIAS 0x3ff + +#define SR_ROUND_OFF 22 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_divdf3 + .type __hexagon_divdf3,@function + Q6_ALIAS(divdf3) + FAST_ALIAS(divdf3) + FAST2_ALIAS(divdf3) + .p2align 5 +__hexagon_divdf3: + { + NORMAL = dfclass(A,#DFCLASS_NORMAL) + NORMAL = dfclass(B,#DFCLASS_NORMAL) + EXPBA = combine(BH,AH) + SIGN = xor(AH,BH) + } +#undef A +#undef AH +#undef AL +#undef B +#undef BH +#undef BL +#define REM r1:0 +#define REMHI r1 +#define REMLO r0 +#define DENOM r3:2 +#define DENOMHI r3 +#define DENOMLO r2 + { + if (!NORMAL) jump .Ldiv_abnormal + PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) + SFONE = ##0x3f800001 + } + { + SFDEN = or(SFONE,PRODLO) + EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) + EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) + Q_POSITIVE = cmp.gt(SIGN,#-1) + } +#undef SIGN +#define ONE r28 +.Ldenorm_continue: + { + SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) + SFERROR = and(SFONE,#-2) + ONE = #1 + EXPA = sub(EXPA,EXPB) + } +#undef EXPB +#define RECIPEST r13 + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + RECIPEST = ##0x00800000 << RECIPEST_SHIFT + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + SFERROR = and(SFONE,#-2) + } + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + QH = #-DF_BIAS+1 + QL = #DF_BIAS-1 + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + NO_OVF_UNF = cmp.gt(EXPA,QH) + NO_OVF_UNF = !cmp.gt(EXPA,QL) + } + { + RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) + Q = #0 + EXPA = add(EXPA,#-QADJ) + } +#undef SFERROR +#undef SFRECIP +#define TMP r10 +#define TMP1 r11 + { + RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) + } + +#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ + { \ + PROD = mpyu(RECIPEST,REMHI); \ + REM = asl(REM,# ## ( REMSHIFT )); \ + }; \ + { \ + PRODLO = # ## 0; \ + REM -= mpyu(PRODHI,DENOMLO); \ + REMSUB2 = mpyu(PRODHI,DENOMHI); \ + }; \ + { \ + Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ + REM -= asl(REMSUB2, # ## 32); \ + EXTRA \ + } + + + DIV_ITER1B(ASL,14,15,) + DIV_ITER1B(ASR,1,15,) + DIV_ITER1B(ASR,16,15,) + DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) + +#undef REMSUB2 +#define TMPPAIR r15:14 +#define TMPPAIRHI r15 +#define TMPPAIRLO r14 +#undef RECIPEST +#define EXPB r13 + { + // compare or sub with carry + TMPPAIR = sub(REM,DENOM) + P_TMP = cmp.gtu(DENOM,REM) + // set up amt to add to q + if (!P_TMP.new) PRODLO = #2 + } + { + Q = add(Q,PROD) + if (!P_TMP) REM = TMPPAIR + TMPPAIR = #0 + } + { + P_TMP = cmp.eq(REM,TMPPAIR) + if (!P_TMP.new) QL = or(QL,ONE) + } + { + PROD = neg(Q) + } + { + if (!Q_POSITIVE) Q = PROD + } +#undef REM +#undef REMHI +#undef REMLO +#undef DENOM +#undef DENOMLO +#undef DENOMHI +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + { + A = convert_d2df(Q) + if (!NO_OVF_UNF) jump .Ldiv_ovf_unf + } + { + AH += asl(EXPA,#DF_MANTBITS-32) + jumpr r31 + } + +.Ldiv_ovf_unf: + { + AH += asl(EXPA,#DF_MANTBITS-32) + EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) + } + { + PROD = abs(Q) + EXPA = add(EXPA,EXPB) + } + { + P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow + if (P_TMP.new) jump:nt .Ldiv_ovf + } + { + P_TMP = cmp.gt(EXPA,#0) + if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... + } + /* Underflow */ + /* We know what the infinite range exponent should be (EXPA) */ + /* Q is 2's complement, PROD is abs(Q) */ + /* Normalize Q, shift right, add a high bit, convert, change exponent */ + +#define FUDGE1 7 // how much to shift right +#define FUDGE2 4 // how many guard/round to keep at lsbs + + { + EXPB = add(clb(PROD),#-1) // doesn't need to be added in since + EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent + TMP = USR + TMP1 = #63 + } + { + EXPB = min(EXPA,TMP1) + TMP1 = or(TMP,#0x030) + PROD = asl(PROD,EXPB) + EXPA = #0 + } + { + TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out + PROD = lsr(PROD,EXPB) // shift out bits + B = #1 + } + { + P_TMP = cmp.gtu(B,TMPPAIR) + if (!P_TMP.new) PRODLO = or(BL,PRODLO) + PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) + } + { + Q = neg(PROD) + P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) + if (!P_TMP.new) TMP = TMP1 + } + { + USR = TMP + if (Q_POSITIVE) Q = PROD + TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) + } + { + A = convert_d2df(Q) + } + { + AH += asl(TMP,#DF_MANTBITS-32) + jumpr r31 + } + + +.Lpossible_unf: + /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */ + /* The answer is correct, but we need to raise Underflow */ + { + B = extractu(A,#63,#0) + TMPPAIR = combine(##0x00100000,#0) // min normal + TMP = #0x7FFF + } + { + P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... + P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? + } + +#if (__HEXAGON_ARCH__ == 60) + TMP = USR // If not, just return + if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact + // Note that inexact is already set... +#else + { + if (!P_TMP) jumpr r31 // If not, just return + TMP = USR // Else, we want to set Unf+Inexact + } // Note that inexact is already set... +#endif + { + TMP = or(TMP,#0x30) + } + { + USR = TMP + } + { + p0 = dfcmp.eq(A,A) + jumpr r31 + } + +.Ldiv_ovf: + /* + * Raise Overflow, and choose the correct overflow value (saturated normal or infinity) + */ + { + TMP = USR + B = combine(##0x7fefffff,#-1) + AH = mux(Q_POSITIVE,#0,#-1) + } + { + PROD = combine(##0x7ff00000,#0) + QH = extractu(TMP,#2,#SR_ROUND_OFF) + TMP = or(TMP,#0x28) + } + { + USR = TMP + QH ^= lsr(AH,#31) + QL = QH + } + { + p0 = !cmp.eq(QL,#1) // if not round-to-zero + p0 = !cmp.eq(QH,#2) // and not rounding the other way + if (p0.new) B = PROD // go to inf + p0 = dfcmp.eq(B,B) // get exceptions + } + { + A = insert(B,#63,#0) + jumpr r31 + } + +#undef ONE +#define SIGN r28 +#undef NORMAL +#undef NO_OVF_UNF +#define P_INF p1 +#define P_ZERO p2 +.Ldiv_abnormal: + { + P_TMP = dfclass(A,#DFCLASS_NUMBER) + P_TMP = dfclass(B,#DFCLASS_NUMBER) + Q_POSITIVE = cmp.gt(SIGN,#-1) + } + { + P_INF = dfclass(A,#DFCLASS_INFINITE) + P_INF = dfclass(B,#DFCLASS_INFINITE) + } + { + P_ZERO = dfclass(A,#DFCLASS_ZERO) + P_ZERO = dfclass(B,#DFCLASS_ZERO) + } + { + if (!P_TMP) jump .Ldiv_nan + if (P_INF) jump .Ldiv_invalid + } + { + if (P_ZERO) jump .Ldiv_invalid + } + { + P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero + P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite + } + { + P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite + P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero + } + { + if (!P_ZERO) jump .Ldiv_zero_result + if (!P_INF) jump .Ldiv_inf_result + } + /* Now we've narrowed it down to (de)normal / (de)normal */ + /* Set up A/EXPA B/EXPB and go back */ +#undef P_ZERO +#undef P_INF +#define P_TMP2 p1 + { + P_TMP = dfclass(A,#DFCLASS_NORMAL) + P_TMP2 = dfclass(B,#DFCLASS_NORMAL) + TMP = ##0x00100000 + } + { + EXPBA = combine(BH,AH) + AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit + BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit + } + { + if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit + if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit + } + { + QH = add(clb(A),#-DF_EXPBITS) + QL = add(clb(B),#-DF_EXPBITS) + TMP = #1 + } + { + EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) + EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) + } + { + A = asl(A,QH) + B = asl(B,QL) + if (!P_TMP) EXPA = sub(TMP,QH) + if (!P_TMP2) EXPB = sub(TMP,QL) + } // recreate values needed by resume coke + { + PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) + } + { + SFDEN = or(SFONE,PRODLO) + jump .Ldenorm_continue + } + +.Ldiv_zero_result: + { + AH = xor(AH,BH) + B = #0 + } + { + A = insert(B,#63,#0) + jumpr r31 + } +.Ldiv_inf_result: + { + p2 = dfclass(B,#DFCLASS_ZERO) + p2 = dfclass(A,#DFCLASS_NONINFINITE) + } + { + TMP = USR + if (!p2) jump 1f + AH = xor(AH,BH) + } + { + TMP = or(TMP,#0x04) // DBZ + } + { + USR = TMP + } +1: + { + B = combine(##0x7ff00000,#0) + p0 = dfcmp.uo(B,B) // take possible exception + } + { + A = insert(B,#63,#0) + jumpr r31 + } +.Ldiv_nan: + { + p0 = dfclass(A,#0x10) + p1 = dfclass(B,#0x10) + if (!p0.new) A = B + if (!p1.new) B = A + } + { + QH = convert_df2sf(A) // get possible invalid exceptions + QL = convert_df2sf(B) + } + { + A = #-1 + jumpr r31 + } + +.Ldiv_invalid: + { + TMP = ##0x7f800001 + } + { + A = convert_sf2df(TMP) // get invalid, get DF qNaN + jumpr r31 + } +END(__hexagon_divdf3) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dffma.S b/contrib/compiler-rt/lib/builtins/hexagon/dffma.S new file mode 100644 index 000000000000..97b885a3bf27 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dffma.S @@ -0,0 +1,705 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + +/* Double Precision Multiply */ + + +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 +#define C r5:4 +#define CH r5 +#define CL r4 + + + +#define BTMP r15:14 +#define BTMPH r15 +#define BTMPL r14 + +#define ATMP r13:12 +#define ATMPH r13 +#define ATMPL r12 + +#define CTMP r11:10 +#define CTMPH r11 +#define CTMPL r10 + +#define PP_LL r9:8 +#define PP_LL_H r9 +#define PP_LL_L r8 + +#define PP_ODD r7:6 +#define PP_ODD_H r7 +#define PP_ODD_L r6 + + +#define PP_HH r17:16 +#define PP_HH_H r17 +#define PP_HH_L r16 + +#define EXPA r18 +#define EXPB r19 +#define EXPBA r19:18 + +#define TMP r28 + +#define P_TMP p0 +#define PROD_NEG p3 +#define EXACT p2 +#define SWAP p1 + +#define MANTBITS 52 +#define HI_MANTBITS 20 +#define EXPBITS 11 +#define BIAS 1023 +#define STACKSPACE 32 + +#define ADJUST 4 + +#define FUDGE 7 +#define FUDGE2 3 + +#ifndef SR_ROUND_OFF +#define SR_ROUND_OFF 22 +#endif + + /* + * First, classify for normal values, and abort if abnormal + * + * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 + * + * Since we know that the 2 MSBs of the H registers is zero, we should never carry + * the partial products that involve the H registers + * + * Try to buy X slots, at the expense of latency if needed + * + * We will have PP_HH with the upper bits of the product, PP_LL with the lower + * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts + * PP_HH can have a minimum of 0x0100_0000_0000_0000 + * + * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS + * + * We need to align CTMP. + * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add + * If CTMP << PP align CTMP and add 128 bits. Then compute sticky + * If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. + * + * Convert partial product and CTMP to 2's complement prior to addition + * + * After we add, we need to normalize into upper 64 bits, then compute sticky. + * + * + */ + + .text + .global __hexagon_fmadf4 + .type __hexagon_fmadf4,@function + .global __hexagon_fmadf5 + .type __hexagon_fmadf5,@function + .global fma + .type fma,@function + Q6_ALIAS(fmadf5) + .p2align 5 +__hexagon_fmadf4: +__hexagon_fmadf5: +fma: + { + P_TMP = dfclass(A,#2) + P_TMP = dfclass(B,#2) + ATMP = #0 + BTMP = #0 + } + { + ATMP = insert(A,#MANTBITS,#EXPBITS-3) + BTMP = insert(B,#MANTBITS,#EXPBITS-3) + PP_ODD_H = ##0x10000000 + allocframe(#STACKSPACE) + } + { + PP_LL = mpyu(ATMPL,BTMPL) + if (!P_TMP) jump .Lfma_abnormal_ab + ATMPH = or(ATMPH,PP_ODD_H) + BTMPH = or(BTMPH,PP_ODD_H) + } + { + P_TMP = dfclass(C,#2) + if (!P_TMP.new) jump:nt .Lfma_abnormal_c + CTMP = combine(PP_ODD_H,#0) + PP_ODD = combine(#0,PP_LL_H) + } +.Lfma_abnormal_c_restart: + { + PP_ODD += mpyu(BTMPL,ATMPH) + CTMP = insert(C,#MANTBITS,#EXPBITS-3) + memd(r29+#0) = PP_HH + memd(r29+#8) = EXPBA + } + { + PP_ODD += mpyu(ATMPL,BTMPH) + EXPBA = neg(CTMP) + P_TMP = cmp.gt(CH,#-1) + TMP = xor(AH,BH) + } + { + EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) + PP_HH = combine(#0,PP_ODD_H) + if (!P_TMP) CTMP = EXPBA + } + { + PP_HH += mpyu(ATMPH,BTMPH) + PP_LL = combine(PP_ODD_L,PP_LL_L) +#undef PP_ODD +#undef PP_ODD_H +#undef PP_ODD_L +#undef ATMP +#undef ATMPL +#undef ATMPH +#undef BTMP +#undef BTMPL +#undef BTMPH +#define RIGHTLEFTSHIFT r13:12 +#define RIGHTSHIFT r13 +#define LEFTSHIFT r12 + + EXPA = add(EXPA,EXPB) +#undef EXPB +#undef EXPBA +#define EXPC r19 +#define EXPCA r19:18 + EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) + } + /* PP_HH:PP_LL now has product */ + /* CTMP is negated */ + /* EXPA,B,C are extracted */ + /* + * We need to negate PP + * Since we will be adding with carry later, if we need to negate, + * just invert all bits now, which we can do conditionally and in parallel + */ +#define PP_HH_TMP r15:14 +#define PP_LL_TMP r7:6 + { + EXPA = add(EXPA,#-BIAS+(ADJUST)) + PROD_NEG = !cmp.gt(TMP,#-1) + PP_LL_TMP = #0 + PP_HH_TMP = #0 + } + { + PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry + P_TMP = !cmp.gt(TMP,#-1) + SWAP = cmp.gt(EXPC,EXPA) // If C >> PP + if (SWAP.new) EXPCA = combine(EXPA,EXPC) + } + { + PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry + if (P_TMP) PP_LL = PP_LL_TMP +#undef PP_LL_TMP +#define CTMP2 r7:6 +#define CTMP2H r7 +#define CTMP2L r6 + CTMP2 = #0 + EXPC = sub(EXPA,EXPC) + } + { + if (P_TMP) PP_HH = PP_HH_TMP + P_TMP = cmp.gt(EXPC,#63) + if (SWAP) PP_LL = CTMP2 + if (SWAP) CTMP2 = PP_LL + } +#undef PP_HH_TMP +//#define ONE r15:14 +//#define S_ONE r14 +#define ZERO r15:14 +#define S_ZERO r15 +#undef PROD_NEG +#define P_CARRY p3 + { + if (SWAP) PP_HH = CTMP // Swap C and PP + if (SWAP) CTMP = PP_HH + if (P_TMP) EXPC = add(EXPC,#-64) + TMP = #63 + } + { + // If diff > 63, pre-shift-right by 64... + if (P_TMP) CTMP2 = CTMP + TMP = asr(CTMPH,#31) + RIGHTSHIFT = min(EXPC,TMP) + LEFTSHIFT = #0 + } +#undef C +#undef CH +#undef CL +#define STICKIES r5:4 +#define STICKIESH r5 +#define STICKIESL r4 + { + if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64 + STICKIES = extract(CTMP2,RIGHTLEFTSHIFT) + CTMP2 = lsr(CTMP2,RIGHTSHIFT) + LEFTSHIFT = sub(#64,RIGHTSHIFT) + } + { + ZERO = #0 + TMP = #-2 + CTMP2 |= lsl(CTMP,LEFTSHIFT) + CTMP = asr(CTMP,RIGHTSHIFT) + } + { + P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift + if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR +#undef ZERO +#define ONE r15:14 +#define S_ONE r14 + ONE = #1 + STICKIES = #0 + } + { + PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky + } + { + PP_HH = add(CTMP,PP_HH,P_CARRY):carry + TMP = #62 + } + /* + * PP_HH:PP_LL now holds the sum + * We may need to normalize left, up to ??? bits. + * + * I think that if we have massive cancellation, the range we normalize by + * is still limited + */ + { + LEFTSHIFT = add(clb(PP_HH),#-2) + if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? + } + /* We had all sign bits, shift left by 62. */ + { + CTMP = extractu(PP_LL,#62,#2) + PP_LL = asl(PP_LL,#62) + EXPA = add(EXPA,#-62) // And adjust exponent of result + } + { + PP_HH = insert(CTMP,#62,#0) // Then shift 63 + } + { + LEFTSHIFT = add(clb(PP_HH),#-2) + } + .falign +1: + { + CTMP = asl(PP_HH,LEFTSHIFT) + STICKIES |= asl(PP_LL,LEFTSHIFT) + RIGHTSHIFT = sub(#64,LEFTSHIFT) + EXPA = sub(EXPA,LEFTSHIFT) + } + { + CTMP |= lsr(PP_LL,RIGHTSHIFT) + EXACT = cmp.gtu(ONE,STICKIES) + TMP = #BIAS+BIAS-2 + } + { + if (!EXACT) CTMPL = or(CTMPL,S_ONE) + // If EXPA is overflow/underflow, jump to ovf_unf + P_TMP = !cmp.gt(EXPA,TMP) + P_TMP = cmp.gt(EXPA,#1) + if (!P_TMP.new) jump:nt .Lfma_ovf_unf + } + { + // XXX: FIXME: should PP_HH for check of zero be CTMP? + P_TMP = cmp.gtu(ONE,CTMP) // is result true zero? + A = convert_d2df(CTMP) + EXPA = add(EXPA,#-BIAS-60) + PP_HH = memd(r29+#0) + } + { + AH += asl(EXPA,#HI_MANTBITS) + EXPCA = memd(r29+#8) + if (!P_TMP) dealloc_return // not zero, return + } +.Ladd_yields_zero: + /* We had full cancellation. Return +/- zero (-0 when round-down) */ + { + TMP = USR + A = #0 + } + { + TMP = extractu(TMP,#2,#SR_ROUND_OFF) + PP_HH = memd(r29+#0) + EXPCA = memd(r29+#8) + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = ##0x80000000 + dealloc_return + } + +#undef RIGHTLEFTSHIFT +#undef RIGHTSHIFT +#undef LEFTSHIFT +#undef CTMP2 +#undef CTMP2H +#undef CTMP2L + +.Lfma_ovf_unf: + { + p0 = cmp.gtu(ONE,CTMP) + if (p0.new) jump:nt .Ladd_yields_zero + } + { + A = convert_d2df(CTMP) + EXPA = add(EXPA,#-BIAS-60) + TMP = EXPA + } +#define NEW_EXPB r7 +#define NEW_EXPA r6 + { + AH += asl(EXPA,#HI_MANTBITS) + NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS) + } + { + NEW_EXPA = add(EXPA,NEW_EXPB) + PP_HH = memd(r29+#0) + EXPCA = memd(r29+#8) +#undef PP_HH +#undef PP_HH_H +#undef PP_HH_L +#undef EXPCA +#undef EXPC +#undef EXPA +#undef PP_LL +#undef PP_LL_H +#undef PP_LL_L +#define EXPA r6 +#define EXPB r7 +#define EXPBA r7:6 +#define ATMP r9:8 +#define ATMPH r9 +#define ATMPL r8 +#undef NEW_EXPB +#undef NEW_EXPA + ATMP = abs(CTMP) + } + { + p0 = cmp.gt(EXPA,##BIAS+BIAS) + if (p0.new) jump:nt .Lfma_ovf + } + { + p0 = cmp.gt(EXPA,#0) + if (p0.new) jump:nt .Lpossible_unf + } + { + // TMP has original EXPA. + // ATMP is corresponding value + // Normalize ATMP and shift right to correct location + EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize + EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize + p3 = cmp.gt(CTMPH,#-1) + } + /* Underflow */ + /* We know that the infinte range exponent should be EXPA */ + /* CTMP is 2's complement, ATMP is abs(CTMP) */ + { + EXPA = add(EXPA,EXPB) // how much to shift back right + ATMP = asl(ATMP,EXPB) // shift left + AH = USR + TMP = #63 + } + { + EXPB = min(EXPA,TMP) + EXPA = #0 + AL = #0x0030 + } + { + B = extractu(ATMP,EXPBA) + ATMP = asr(ATMP,EXPB) + } + { + p0 = cmp.gtu(ONE,B) + if (!p0.new) ATMPL = or(ATMPL,S_ONE) + ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2) + } + { + CTMP = neg(ATMP) + p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1) + if (!p1.new) AH = or(AH,AL) + B = #0 + } + { + if (p3) CTMP = ATMP + USR = AH + TMP = #-BIAS-(MANTBITS+FUDGE2) + } + { + A = convert_d2df(CTMP) + } + { + AH += asl(TMP,#HI_MANTBITS) + dealloc_return + } +.Lpossible_unf: + { + TMP = ##0x7fefffff + ATMP = abs(CTMP) + } + { + p0 = cmp.eq(AL,#0) + p0 = bitsclr(AH,TMP) + if (!p0.new) dealloc_return:t + TMP = #0x7fff + } + { + p0 = bitsset(ATMPH,TMP) + BH = USR + BL = #0x0030 + } + { + if (p0) BH = or(BH,BL) + } + { + USR = BH + } + { + p0 = dfcmp.eq(A,A) + dealloc_return + } +.Lfma_ovf: + { + TMP = USR + CTMP = combine(##0x7fefffff,#-1) + A = CTMP + } + { + ATMP = combine(##0x7ff00000,#0) + BH = extractu(TMP,#2,#SR_ROUND_OFF) + TMP = or(TMP,#0x28) + } + { + USR = TMP + BH ^= lsr(AH,#31) + BL = BH + } + { + p0 = !cmp.eq(BL,#1) + p0 = !cmp.eq(BH,#2) + } + { + p0 = dfcmp.eq(ATMP,ATMP) + if (p0.new) CTMP = ATMP + } + { + A = insert(CTMP,#63,#0) + dealloc_return + } +#undef CTMP +#undef CTMPH +#undef CTMPL +#define BTMP r11:10 +#define BTMPH r11 +#define BTMPL r10 + +#undef STICKIES +#undef STICKIESH +#undef STICKIESL +#define C r5:4 +#define CH r5 +#define CL r4 + +.Lfma_abnormal_ab: + { + ATMP = extractu(A,#63,#0) + BTMP = extractu(B,#63,#0) + deallocframe + } + { + p3 = cmp.gtu(ATMP,BTMP) + if (!p3.new) A = B // sort values + if (!p3.new) B = A + } + { + p0 = dfclass(A,#0x0f) // A NaN? + if (!p0.new) jump:nt .Lnan + if (!p3) ATMP = BTMP + if (!p3) BTMP = ATMP + } + { + p1 = dfclass(A,#0x08) // A is infinity + p1 = dfclass(B,#0x0e) // B is nonzero + } + { + p0 = dfclass(A,#0x08) // a is inf + p0 = dfclass(B,#0x01) // b is zero + } + { + if (p1) jump .Lab_inf + p2 = dfclass(B,#0x01) + } + { + if (p0) jump .Linvalid + if (p2) jump .Lab_true_zero + TMP = ##0x7c000000 + } + // We are left with a normal or subnormal times a subnormal, A > B + // If A and B are both very small, we will go to a single sticky bit; replace + // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results + // if A and B might multiply to something bigger, decrease A exp and increase B exp + // and start over + { + p0 = bitsclr(AH,TMP) + if (p0.new) jump:nt .Lfma_ab_tiny + } + { + TMP = add(clb(BTMP),#-EXPBITS) + } + { + BTMP = asl(BTMP,TMP) + } + { + B = insert(BTMP,#63,#0) + AH -= asl(TMP,#HI_MANTBITS) + } + jump fma + +.Lfma_ab_tiny: + ATMP = combine(##0x00100000,#0) + { + A = insert(ATMP,#63,#0) + B = insert(ATMP,#63,#0) + } + jump fma + +.Lab_inf: + { + B = lsr(B,#63) + p0 = dfclass(C,#0x10) + } + { + A ^= asl(B,#63) + if (p0) jump .Lnan + } + { + p1 = dfclass(C,#0x08) + if (p1.new) jump:nt .Lfma_inf_plus_inf + } + /* A*B is +/- inf, C is finite. Return A */ + { + jumpr r31 + } + .falign +.Lfma_inf_plus_inf: + { // adding infinities of different signs is invalid + p0 = dfcmp.eq(A,C) + if (!p0.new) jump:nt .Linvalid + } + { + jumpr r31 + } + +.Lnan: + { + p0 = dfclass(B,#0x10) + p1 = dfclass(C,#0x10) + if (!p0.new) B = A + if (!p1.new) C = A + } + { // find sNaNs + BH = convert_df2sf(B) + BL = convert_df2sf(C) + } + { + BH = convert_df2sf(A) + A = #-1 + jumpr r31 + } + +.Linvalid: + { + TMP = ##0x7f800001 // sp snan + } + { + A = convert_sf2df(TMP) + jumpr r31 + } + +.Lab_true_zero: + // B is zero, A is finite number + { + p0 = dfclass(C,#0x10) + if (p0.new) jump:nt .Lnan + if (p0.new) A = C + } + { + p0 = dfcmp.eq(B,C) // is C also zero? + AH = lsr(AH,#31) // get sign + } + { + BH ^= asl(AH,#31) // form correctly signed zero in B + if (!p0) A = C // If C is not zero, return C + if (!p0) jumpr r31 + } + /* B has correctly signed zero, C is also zero */ +.Lzero_plus_zero: + { + p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 + if (p0.new) jumpr:t r31 + A = B + } + { + TMP = USR + } + { + TMP = extractu(TMP,#2,#SR_ROUND_OFF) + A = #0 + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = ##0x80000000 + jumpr r31 + } +#undef BTMP +#undef BTMPH +#undef BTMPL +#define CTMP r11:10 + .falign +.Lfma_abnormal_c: + /* We know that AB is normal * normal */ + /* C is not normal: zero, subnormal, inf, or NaN. */ + { + p0 = dfclass(C,#0x10) // is C NaN? + if (p0.new) jump:nt .Lnan + if (p0.new) A = C // move NaN to A + deallocframe + } + { + p0 = dfclass(C,#0x08) // is C inf? + if (p0.new) A = C // return C + if (p0.new) jumpr:nt r31 + } + // zero or subnormal + // If we have a zero, and we know AB is normal*normal, we can just call normal multiply + { + p0 = dfclass(C,#0x01) // is C zero? + if (p0.new) jump:nt __hexagon_muldf3 + TMP = #1 + } + // Left with: subnormal + // Adjust C and jump back to restart + { + allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame + CTMP = #0 + CH = insert(TMP,#EXPBITS,#HI_MANTBITS) + jump .Lfma_abnormal_c_restart + } +END(fma) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S b/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S new file mode 100644 index 000000000000..41122911f183 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S @@ -0,0 +1,79 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define A r1:0 +#define B r3:2 +#define ATMP r5:4 + + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + +/* + * Min and Max return A if B is NaN, or B if A is NaN + * Otherwise, they return the smaller or bigger value + * + * If values are equal, we want to favor -0.0 for min and +0.0 for max. + */ + +/* + * Compares always return false for NaN + * if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options. + */ + .text + .global __hexagon_mindf3 + .global __hexagon_maxdf3 + .global fmin + .type fmin,@function + .global fmax + .type fmax,@function + .type __hexagon_mindf3,@function + .type __hexagon_maxdf3,@function + Q6_ALIAS(mindf3) + Q6_ALIAS(maxdf3) + .p2align 5 +__hexagon_mindf3: +fmin: + { + p0 = dfclass(A,#0x10) // If A is a number + p1 = dfcmp.gt(A,B) // AND B > A, don't swap + ATMP = A + } + { + if (p0) A = B // if A is NaN use B + if (p1) A = B // gt is always false if either is NaN + p2 = dfcmp.eq(A,B) // if A == B + if (!p2.new) jumpr:t r31 + } + /* A == B, return A|B to select -0.0 over 0.0 */ + { + A = or(ATMP,B) + jumpr r31 + } +END(__hexagon_mindf3) + .falign +__hexagon_maxdf3: +fmax: + { + p0 = dfclass(A,#0x10) + p1 = dfcmp.gt(B,A) + ATMP = A + } + { + if (p0) A = B + if (p1) A = B + p2 = dfcmp.eq(A,B) + if (!p2.new) jumpr:t r31 + } + /* A == B, return A&B to select 0.0 over -0.0 */ + { + A = and(ATMP,B) + jumpr r31 + } +END(__hexagon_maxdf3) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S b/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S new file mode 100644 index 000000000000..fde6d77bdcff --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S @@ -0,0 +1,418 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Double Precision Multiply */ +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + +#define BTMP r5:4 +#define BTMPH r5 +#define BTMPL r4 + +#define PP_ODD r7:6 +#define PP_ODD_H r7 +#define PP_ODD_L r6 + +#define ONE r9:8 +#define S_ONE r8 +#define S_ZERO r9 + +#define PP_HH r11:10 +#define PP_HH_H r11 +#define PP_HH_L r10 + +#define ATMP r13:12 +#define ATMPH r13 +#define ATMPL r12 + +#define PP_LL r15:14 +#define PP_LL_H r15 +#define PP_LL_L r14 + +#define TMP r28 + +#define MANTBITS 52 +#define HI_MANTBITS 20 +#define EXPBITS 11 +#define BIAS 1024 +#define MANTISSA_TO_INT_BIAS 52 + +/* Some constant to adjust normalization amount in error code */ +/* Amount to right shift the partial product to get to a denorm */ +#define FUDGE 5 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + +#define SR_ROUND_OFF 22 + .text + .global __hexagon_muldf3 + .type __hexagon_muldf3,@function + Q6_ALIAS(muldf3) + FAST_ALIAS(muldf3) + FAST2_ALIAS(muldf3) + .p2align 5 +__hexagon_muldf3: + { + p0 = dfclass(A,#2) + p0 = dfclass(B,#2) + ATMP = combine(##0x40000000,#0) + } + { + ATMP = insert(A,#MANTBITS,#EXPBITS-1) + BTMP = asl(B,#EXPBITS-1) + TMP = #-BIAS + ONE = #1 + } + { + PP_ODD = mpyu(BTMPL,ATMPH) + BTMP = insert(ONE,#2,#62) + } + /* since we know that the MSB of the H registers is zero, we should never carry */ + /* H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 */ + /* Adding 2 HLs, we get 2^64-3*2^32+2 maximum. */ + /* Therefore, we can add 3 2^32-1 values safely without carry. We only need one. */ + { + PP_LL = mpyu(ATMPL,BTMPL) + PP_ODD += mpyu(ATMPL,BTMPH) + } + { + PP_ODD += lsr(PP_LL,#32) + PP_HH = mpyu(ATMPH,BTMPH) + BTMP = combine(##BIAS+BIAS-4,#0) + } + { + PP_HH += lsr(PP_ODD,#32) + if (!p0) jump .Lmul_abnormal + p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0? + p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0? + } + /* + * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts + * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so + */ +#undef PP_ODD +#undef PP_ODD_H +#undef PP_ODD_L +#define EXP10 r7:6 +#define EXP1 r7 +#define EXP0 r6 + { + if (!p1) PP_HH_L = or(PP_HH_L,S_ONE) + EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS) + } + { + PP_LL = neg(PP_HH) + EXP0 += add(TMP,EXP1) + TMP = xor(AH,BH) + } + { + if (!p2.new) PP_HH = PP_LL + p2 = cmp.gt(TMP,#-1) + p0 = !cmp.gt(EXP0,BTMPH) + p0 = cmp.gt(EXP0,BTMPL) + if (!p0.new) jump:nt .Lmul_ovf_unf + } + { + A = convert_d2df(PP_HH) + EXP0 = add(EXP0,#-BIAS-58) + } + { + AH += asl(EXP0,#HI_MANTBITS) + jumpr r31 + } + + .falign +.Lpossible_unf: + /* We end up with a positive exponent */ + /* But we may have rounded up to an exponent of 1. */ + /* If the exponent is 1, if we rounded up to it + * we need to also raise underflow + * Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000 + * And the PP should also have more than one bit set + */ + /* Note: ATMP should have abs(PP_HH) */ + /* Note: BTMPL should have 0x7FEFFFFF */ + { + p0 = cmp.eq(AL,#0) + p0 = bitsclr(AH,BTMPL) + if (!p0.new) jumpr:t r31 + BTMPH = #0x7fff + } + { + p0 = bitsset(ATMPH,BTMPH) + BTMPL = USR + BTMPH = #0x030 + } + { + if (p0) BTMPL = or(BTMPL,BTMPH) + } + { + USR = BTMPL + } + { + p0 = dfcmp.eq(A,A) + jumpr r31 + } + .falign +.Lmul_ovf_unf: + { + A = convert_d2df(PP_HH) + ATMP = abs(PP_HH) // take absolute value + EXP1 = add(EXP0,#-BIAS-58) + } + { + AH += asl(EXP1,#HI_MANTBITS) + EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS) + BTMPL = ##0x7FEFFFFF + } + { + EXP1 += add(EXP0,##-BIAS-58) + //BTMPH = add(clb(ATMP),#-2) + BTMPH = #0 + } + { + p0 = cmp.gt(EXP1,##BIAS+BIAS-2) // overflow + if (p0.new) jump:nt .Lmul_ovf + } + { + p0 = cmp.gt(EXP1,#0) + if (p0.new) jump:nt .Lpossible_unf + BTMPH = sub(EXP0,BTMPH) + TMP = #63 // max amount to shift + } + /* Underflow */ + /* + * PP_HH has the partial product with sticky LSB. + * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts + * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so + * The exponent of PP_HH is in EXP1, which is non-positive (0 or negative) + * That's the exponent that happens after the normalization + * + * EXP0 has the exponent that, when added to the normalized value, is out of range. + * + * Strategy: + * + * * Shift down bits, with sticky bit, such that the bits are aligned according + * to the LZ count and appropriate exponent, but not all the way to mantissa + * field, keep around the last few bits. + * * Put a 1 near the MSB + * * Check the LSBs for inexact; if inexact also set underflow + * * Convert [u]d2df -- will correctly round according to rounding mode + * * Replace exponent field with zero + * + * + */ + + + { + BTMPL = #0 // offset for extract + BTMPH = sub(#FUDGE,BTMPH) // amount to right shift + } + { + p3 = cmp.gt(PP_HH_H,#-1) // is it positive? + BTMPH = min(BTMPH,TMP) // Don't shift more than 63 + PP_HH = ATMP + } + { + TMP = USR + PP_LL = extractu(PP_HH,BTMP) + } + { + PP_HH = asr(PP_HH,BTMPH) + BTMPL = #0x0030 // underflow flag + AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS) + } + { + p0 = cmp.gtu(ONE,PP_LL) // Did we extract all zeros? + if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE) // add sticky bit + PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3) // Add back in a bit so we can use convert instruction + } + { + PP_LL = neg(PP_HH) + p1 = bitsclr(PP_HH_L,#0x7) // Are the LSB's clear? + if (!p1.new) TMP = or(BTMPL,TMP) // If not, Inexact+Underflow + } + { + if (!p3) PP_HH = PP_LL + USR = TMP + } + { + A = convert_d2df(PP_HH) // Do rounding + p0 = dfcmp.eq(A,A) // realize exception + } + { + AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1) // Insert correct exponent + jumpr r31 + } + .falign +.Lmul_ovf: + // We get either max finite value or infinity. Either way, overflow+inexact + { + TMP = USR + ATMP = combine(##0x7fefffff,#-1) // positive max finite + A = PP_HH + } + { + PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits + TMP = or(TMP,#0x28) // inexact + overflow + BTMP = combine(##0x7ff00000,#0) // positive infinity + } + { + USR = TMP + PP_LL_L ^= lsr(AH,#31) // Does sign match rounding? + TMP = PP_LL_L // unmodified rounding mode + } + { + p0 = !cmp.eq(TMP,#1) // If not round-to-zero and + p0 = !cmp.eq(PP_LL_L,#2) // Not rounding the other way, + if (p0.new) ATMP = BTMP // we should get infinity + p0 = dfcmp.eq(A,A) // Realize FP exception if enabled + } + { + A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign + jumpr r31 + } + +.Lmul_abnormal: + { + ATMP = extractu(A,#63,#0) // strip off sign + BTMP = extractu(B,#63,#0) // strip off sign + } + { + p3 = cmp.gtu(ATMP,BTMP) + if (!p3.new) A = B // sort values + if (!p3.new) B = A // sort values + } + { + // Any NaN --> NaN, possibly raise invalid if sNaN + p0 = dfclass(A,#0x0f) // A not NaN? + if (!p0.new) jump:nt .Linvalid_nan + if (!p3) ATMP = BTMP + if (!p3) BTMP = ATMP + } + { + // Infinity * nonzero number is infinity + p1 = dfclass(A,#0x08) // A is infinity + p1 = dfclass(B,#0x0e) // B is nonzero + } + { + // Infinity * zero --> NaN, raise invalid + // Other zeros return zero + p0 = dfclass(A,#0x08) // A is infinity + p0 = dfclass(B,#0x01) // B is zero + } + { + if (p1) jump .Ltrue_inf + p2 = dfclass(B,#0x01) + } + { + if (p0) jump .Linvalid_zeroinf + if (p2) jump .Ltrue_zero // so return zero + TMP = ##0x7c000000 + } + // We are left with a normal or subnormal times a subnormal. A > B + // If A and B are both very small (exp(a) < BIAS-MANTBITS), + // we go to a single sticky bit, which we can round easily. + // If A and B might multiply to something bigger, decrease A exponent and increase + // B exponent and try again + { + p0 = bitsclr(AH,TMP) + if (p0.new) jump:nt .Lmul_tiny + } + { + TMP = cl0(BTMP) + } + { + TMP = add(TMP,#-EXPBITS) + } + { + BTMP = asl(BTMP,TMP) + } + { + B = insert(BTMP,#63,#0) + AH -= asl(TMP,#HI_MANTBITS) + } + jump __hexagon_muldf3 +.Lmul_tiny: + { + TMP = USR + A = xor(A,B) // get sign bit + } + { + TMP = or(TMP,#0x30) // Inexact + Underflow + A = insert(ONE,#63,#0) // put in rounded up value + BTMPH = extractu(TMP,#2,#SR_ROUND_OFF) // get rounding mode + } + { + USR = TMP + p0 = cmp.gt(BTMPH,#1) // Round towards pos/neg inf? + if (!p0.new) AL = #0 // If not, zero + BTMPH ^= lsr(AH,#31) // rounding my way --> set LSB + } + { + p0 = cmp.eq(BTMPH,#3) // if rounding towards right inf + if (!p0.new) AL = #0 // don't go to zero + jumpr r31 + } +.Linvalid_zeroinf: + { + TMP = USR + } + { + A = #-1 + TMP = or(TMP,#2) + } + { + USR = TMP + } + { + p0 = dfcmp.uo(A,A) // force exception if enabled + jumpr r31 + } +.Linvalid_nan: + { + p0 = dfclass(B,#0x0f) // if B is not NaN + TMP = convert_df2sf(A) // will generate invalid if sNaN + if (p0.new) B = A // make it whatever A is + } + { + BL = convert_df2sf(B) // will generate invalid if sNaN + A = #-1 + jumpr r31 + } + .falign +.Ltrue_zero: + { + A = B + B = A + } +.Ltrue_inf: + { + BH = extract(BH,#1,#31) + } + { + AH ^= asl(BH,#31) + jumpr r31 + } +END(__hexagon_muldf3) + +#undef ATMP +#undef ATMPL +#undef ATMPH +#undef BTMP +#undef BTMPL +#undef BTMPH diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S b/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S new file mode 100644 index 000000000000..027d9e1fde43 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S @@ -0,0 +1,406 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/* Double Precision square root */ + +#define EXP r28 + +#define A r1:0 +#define AH r1 +#define AL r0 + +#define SFSH r3:2 +#define SF_S r3 +#define SF_H r2 + +#define SFHALF_SONE r5:4 +#define S_ONE r4 +#define SFHALF r5 +#define SF_D r6 +#define SF_E r7 +#define RECIPEST r8 +#define SFRAD r9 + +#define FRACRAD r11:10 +#define FRACRADH r11 +#define FRACRADL r10 + +#define ROOT r13:12 +#define ROOTHI r13 +#define ROOTLO r12 + +#define PROD r15:14 +#define PRODHI r15 +#define PRODLO r14 + +#define P_TMP p0 +#define P_EXP1 p1 +#define NORMAL p2 + +#define SF_EXPBITS 8 +#define SF_MANTBITS 23 + +#define DF_EXPBITS 11 +#define DF_MANTBITS 52 + +#define DF_BIAS 0x3ff + +#define DFCLASS_ZERO 0x01 +#define DFCLASS_NORMAL 0x02 +#define DFCLASS_DENORMAL 0x02 +#define DFCLASS_INFINITE 0x08 +#define DFCLASS_NAN 0x10 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_sqrtdf2 + .type __hexagon_sqrtdf2,@function + .global __hexagon_sqrt + .type __hexagon_sqrt,@function + Q6_ALIAS(sqrtdf2) + Q6_ALIAS(sqrt) + FAST_ALIAS(sqrtdf2) + FAST_ALIAS(sqrt) + FAST2_ALIAS(sqrtdf2) + FAST2_ALIAS(sqrt) + .type sqrt,@function + .p2align 5 +__hexagon_sqrtdf2: +__hexagon_sqrt: + { + PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) + EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) + SFHALF_SONE = combine(##0x3f000004,#1) + } + { + NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal + NORMAL = cmp.gt(AH,#-1) // and positive? + if (!NORMAL.new) jump:nt .Lsqrt_abnormal + SFRAD = or(SFHALF,PRODLO) + } +#undef NORMAL +.Ldenormal_restart: + { + FRACRAD = A + SF_E,P_TMP = sfinvsqrta(SFRAD) + SFHALF = and(SFHALF,#-16) + SFSH = #0 + } +#undef A +#undef AH +#undef AL +#define ERROR r1:0 +#define ERRORHI r1 +#define ERRORLO r0 + // SF_E : reciprocal square root + // SF_H : half rsqrt + // sf_S : square root + // SF_D : error term + // SFHALF: 0.5 + { + SF_S += sfmpy(SF_E,SFRAD):lib // s0: root + SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent... + SF_D = SFHALF +#undef SFRAD +#define SHIFTAMT r9 + SHIFTAMT = and(EXP,#1) + } + { + SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1 + FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden + P_EXP1 = cmp.gtu(SHIFTAMT,#0) + } + { + SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt + SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip + SF_D = SFHALF + SHIFTAMT = mux(P_EXP1,#8,#9) + } + { + SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term + FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place + SHIFTAMT = mux(P_EXP1,#3,#2) + } + { + SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt + // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x). + PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1) + } + { + SF_H = and(SF_H,##0x007fffff) + } + { + SF_H = add(SF_H,##0x00800000 - 3) + SHIFTAMT = mux(P_EXP1,#7,#8) + } + { + RECIPEST = asl(SF_H,SHIFTAMT) + SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0)) + } + { + ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1))) + } + +#undef SFSH // r3:2 +#undef SF_H // r2 +#undef SF_S // r3 +#undef S_ONE // r4 +#undef SFHALF // r5 +#undef SFHALF_SONE // r5:4 +#undef SF_D // r6 +#undef SF_E // r7 + +#define HL r3:2 +#define LL r5:4 +#define HH r7:6 + +#undef P_EXP1 +#define P_CARRY0 p1 +#define P_CARRY1 p2 +#define P_CARRY2 p3 + + /* Iteration 0 */ + /* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */ + /* We can shift and subtract instead of shift and add? */ + { + ERROR = asl(FRACRAD,#15) + PROD = mpyu(ROOTHI,ROOTHI) + P_CARRY0 = cmp.eq(r0,r0) + } + { + ERROR -= asl(PROD,#15) + PROD = mpyu(ROOTHI,ROOTLO) + P_CARRY1 = cmp.eq(r0,r0) + } + { + ERROR -= lsr(PROD,#16) + P_CARRY2 = cmp.eq(r0,r0) + } + { + ERROR = mpyu(ERRORHI,RECIPEST) + } + { + ROOT += lsr(ERROR,SHIFTAMT) + SHIFTAMT = add(SHIFTAMT,#16) + ERROR = asl(FRACRAD,#31) // for next iter + } + /* Iteration 1 */ + { + PROD = mpyu(ROOTHI,ROOTHI) + ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed + } + { + ERROR -= asl(PROD,#31) + PROD = mpyu(ROOTLO,ROOTLO) + } + { + ERROR -= lsr(PROD,#33) + } + { + ERROR = mpyu(ERRORHI,RECIPEST) + } + { + ROOT += lsr(ERROR,SHIFTAMT) + SHIFTAMT = add(SHIFTAMT,#16) + ERROR = asl(FRACRAD,#47) // for next iter + } + /* Iteration 2 */ + { + PROD = mpyu(ROOTHI,ROOTHI) + } + { + ERROR -= asl(PROD,#47) + PROD = mpyu(ROOTHI,ROOTLO) + } + { + ERROR -= asl(PROD,#16) // bidir shr 31-47 + PROD = mpyu(ROOTLO,ROOTLO) + } + { + ERROR -= lsr(PROD,#17) // 64-47 + } + { + ERROR = mpyu(ERRORHI,RECIPEST) + } + { + ROOT += lsr(ERROR,SHIFTAMT) + } +#undef ERROR +#undef PROD +#undef PRODHI +#undef PRODLO +#define REM_HI r15:14 +#define REM_HI_HI r15 +#define REM_LO r1:0 +#undef RECIPEST +#undef SHIFTAMT +#define TWOROOT_LO r9:8 + /* Adjust Root */ + { + HL = mpyu(ROOTHI,ROOTLO) + LL = mpyu(ROOTLO,ROOTLO) + REM_HI = #0 + REM_LO = #0 + } + { + HL += lsr(LL,#33) + LL += asl(HL,#33) + P_CARRY0 = cmp.eq(r0,r0) + } + { + HH = mpyu(ROOTHI,ROOTHI) + REM_LO = sub(REM_LO,LL,P_CARRY0):carry + TWOROOT_LO = #1 + } + { + HH += lsr(HL,#31) + TWOROOT_LO += asl(ROOT,#1) + } +#undef HL +#undef LL +#define REM_HI_TMP r3:2 +#define REM_HI_TMP_HI r3 +#define REM_LO_TMP r5:4 + { + REM_HI = sub(FRACRAD,HH,P_CARRY0):carry + REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry +#undef FRACRAD +#undef HH +#define ZERO r11:10 +#define ONE r7:6 + ONE = #1 + ZERO = #0 + } + { + REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry + ONE = add(ROOT,ONE) + EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp + } + { + // If carry set, no borrow: result was still positive + if (P_CARRY1) ROOT = ONE + if (P_CARRY1) REM_LO = REM_LO_TMP + if (P_CARRY1) REM_HI = REM_HI_TMP + } + { + REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry + ONE = #1 + EXP = asr(EXP,#1) // divide signed exp by 2 + } + { + REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry + ONE = add(ROOT,ONE) + } + { + if (P_CARRY2) ROOT = ONE + if (P_CARRY2) REM_LO = REM_LO_TMP + // since tworoot <= 2^32, remhi must be zero +#undef REM_HI_TMP +#undef REM_HI_TMP_HI +#define S_ONE r2 +#define ADJ r3 + S_ONE = #1 + } + { + P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero + if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully + ADJ = cl0(ROOT) + EXP = add(EXP,#-63) + } +#undef REM_LO +#define RET r1:0 +#define RETHI r1 + { + RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag + EXP = add(EXP,ADJ) // add back bias + } + { + RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust + jumpr r31 + } +#undef REM_LO_TMP +#undef REM_HI_TMP +#undef REM_HI_TMP_HI +#undef REM_LO +#undef REM_HI +#undef TWOROOT_LO + +#undef RET +#define A r1:0 +#define AH r1 +#define AL r1 +#undef S_ONE +#define TMP r3:2 +#define TMPHI r3 +#define TMPLO r2 +#undef P_CARRY0 +#define P_NEG p1 + + +#define SFHALF r5 +#define SFRAD r9 +.Lsqrt_abnormal: + { + P_TMP = dfclass(A,#DFCLASS_ZERO) // zero? + if (P_TMP.new) jumpr:t r31 + } + { + P_TMP = dfclass(A,#DFCLASS_NAN) + if (P_TMP.new) jump:nt .Lsqrt_nan + } + { + P_TMP = cmp.gt(AH,#-1) + if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg + if (!P_TMP.new) EXP = ##0x7F800001 // sNaN + } + { + P_TMP = dfclass(A,#DFCLASS_INFINITE) + if (P_TMP.new) jumpr:nt r31 + } + // If we got here, we're denormal + // prepare to restart + { + A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa + } + { + EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize? + } + { + A = asl(A,EXP) // Shift mantissa + EXP = sub(#1,EXP) // Form exponent + } + { + AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent + } + { + TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1) + SFHALF = ##0x3f000004 // form half constant + } + { + SFRAD = or(SFHALF,TMPLO) // form sf value + SFHALF = and(SFHALF,#-16) + jump .Ldenormal_restart // restart + } +.Lsqrt_nan: + { + EXP = convert_df2sf(A) // if sNaN, get invalid + A = #-1 // qNaN + jumpr r31 + } +.Lsqrt_invalid_neg: + { + A = convert_sf2df(EXP) // Invalid,NaNval + jumpr r31 + } +END(__hexagon_sqrt) +END(__hexagon_sqrtdf2) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S b/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S new file mode 100644 index 000000000000..49ee8104f305 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S @@ -0,0 +1,85 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_divdi3 + { + p2 = tstbit(r1,#31) + p3 = tstbit(r3,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) // count leading 0's of dividend (numerator) + r7 = cl0(r3:2) // count leading 0's of divisor (denominator) + r5:4 = r3:2 // divisor moved into working registers + r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder + } + { + p3 = xor(p2,p3) + r10 = sub(r7,r6) // left shift count for bit & divisor + r1:0 = #0 // initialize quotient to 0 + r15:14 = #1 // initialize bit to 1 + } + { + r11 = add(r10,#1) // loop count is 1 more than shift count + r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb + r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor + } + { + p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend + loop0(1f,r11) // register loop + } + { + if (p0) jump .hexagon_divdi3_return // if divisor > dividend, we're done, so return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder + } + { + r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder + r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8) + } + { + r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8) + r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6) + } + { + r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration + r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration + }:endloop0 + +.hexagon_divdi3_return: + { + r3:2 = neg(r1:0) + } + { + r1:0 = vmux(p3,r3:2,r1:0) + jumpr r31 + } +FUNCTION_END __hexagon_divdi3 + + .globl __qdsp_divdi3 + .set __qdsp_divdi3, __hexagon_divdi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S new file mode 100644 index 000000000000..8e159baa192f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S @@ -0,0 +1,84 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_divsi3 + { + p0 = cmp.ge(r0,#0) + p1 = cmp.ge(r1,#0) + r1 = abs(r0) + r2 = abs(r1) + } + { + r3 = cl0(r1) + r4 = cl0(r2) + r5 = sub(r1,r2) + p2 = cmp.gtu(r2,r1) + } +#if (__HEXAGON_ARCH__ == 60) + { + r0 = #0 + p1 = xor(p0,p1) + p0 = cmp.gtu(r2,r5) + } + if (p2) jumpr r31 +#else + { + r0 = #0 + p1 = xor(p0,p1) + p0 = cmp.gtu(r2,r5) + if (p2) jumpr r31 + } +#endif + { + r0 = mux(p1,#-1,#1) + if (p0) jumpr r31 + r4 = sub(r4,r3) + r3 = #1 + } + { + r0 = #0 + r3:2 = vlslw(r3:2,r4) + loop0(1f,r4) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + if (!p1) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_divsi3 + + .globl __qdsp_divsi3 + .set __qdsp_divsi3, __hexagon_divsi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S new file mode 100644 index 000000000000..b09b00734d98 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S @@ -0,0 +1,37 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +.macro FUNCTION_BEGIN name +.text +.p2align 5 +.globl \name +.type \name, @function +\name: +.endm + +.macro FUNCTION_END name +.size \name, . - \name +.endm + +FUNCTION_BEGIN fabs + { + r1 = clrbit(r1, #31) + jumpr r31 + } +FUNCTION_END fabs + +FUNCTION_BEGIN fabsf + { + r0 = clrbit(r0, #31) + jumpr r31 + } +FUNCTION_END fabsf + + .globl fabsl + .set fabsl, fabs diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S new file mode 100644 index 000000000000..9286df06c26d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S @@ -0,0 +1,491 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/* ==================================================================== */ +/* FUNCTIONS Optimized double floating point operators */ +/* ==================================================================== */ +/* c = dadd_asm(a, b) */ +/* ==================================================================== * +fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) { + fast2_QDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, expdiff, j, k, hi, lo, cn; + lint mant; + + expdiff = (int) Q6_P_vabsdiffh_PP(a, b); + expdiff = Q6_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) + (mantb>>expb); + + hi = (int) (mant>>32); + lo = (int) (mant); + + k = Q6_R_normamt_R(hi); + if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo); + + mant = (mant << k); + cn = (mant == 0x8000000000000000LL); + exp = exp - k + cn; + + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_dadd_asm + .type fast2_dadd_asm, @function +fast2_dadd_asm: +#define manta R0 +#define mantexpa R1:0 +#define lmanta R1:0 +#define mantb R2 +#define mantexpb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define mantexpd R7:6 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define manth R1 +#define mantl R0 +#define minmin R11:10 // exactly 0x000000000000008001LL +#define minminl R10 +#define k R4 +#define ce P0 + .falign + { + mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL + c63 = #62 + expa = SXTH(manta) + expb = SXTH(mantb) + } { + expd = SXTH(expd) + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + if ( ce) expa = #1 + if (!ce) expb = #1 + manta.L = #0 + expd = MIN(expd, c63) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + mantb.L = #0 + minmin = #0 + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + } { + lmant = add(lmanta, lmantb) + minminl.L = #0x8001 + } { + k = clb(lmant) + c63 = #58 + } { + k = add(k, #-1) + p0 = cmp.gt(k, c63) + } { + mantexpa = ASL(lmant, k) + exp = SUB(exp, k) + if(p0) jump .Ldenorma + } { + manta = insert(exp, #16, #0) + jumpr r31 + } +.Ldenorma: + { + mantexpa = minmin + jumpr r31 + } +/* =================================================================== * + fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) { + fast2_QDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, expdiff, j, k; + lint mant; + + expdiff = (int) Q6_P_vabsdiffh_PP(a, b); + expdiff = Q6_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) - (mantb>>expb); + k = Q6_R_clb_P(mant)-1; + mant = (mant << k); + exp = exp - k; + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_dsub_asm + .type fast2_dsub_asm, @function +fast2_dsub_asm: + +#define manta R0 +#define mantexpa R1:0 +#define lmanta R1:0 +#define mantb R2 +#define mantexpb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define mantexpd R7:6 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define manth R1 +#define mantl R0 +#define minmin R11:10 // exactly 0x000000000000008001LL +#define minminl R10 +#define k R4 +#define ce P0 + .falign + { + mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL + c63 = #62 + expa = SXTH(manta) + expb = SXTH(mantb) + } { + expd = SXTH(expd) + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + if ( ce) expa = #1 + if (!ce) expb = #1 + manta.L = #0 + expd = MIN(expd, c63) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + mantb.L = #0 + minmin = #0 + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + } { + lmant = sub(lmanta, lmantb) + minminl.L = #0x8001 + } { + k = clb(lmant) + c63 = #58 + } { + k = add(k, #-1) + p0 = cmp.gt(k, c63) + } { + mantexpa = ASL(lmant, k) + exp = SUB(exp, k) + if(p0) jump .Ldenorm + } { + manta = insert(exp, #16, #0) + jumpr r31 + } +.Ldenorm: + { + mantexpa = minmin + jumpr r31 + } +/* ==================================================================== * + fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) { + fast2_QDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, k; + lint mant; + int hia, hib, hi, lo; + unsigned int loa, lob; + + hia = (int)(a >> 32); + loa = Q6_R_extractu_RII((int)manta, 31, 1); + hib = (int)(b >> 32); + lob = Q6_R_extractu_RII((int)mantb, 31, 1); + + mant = Q6_P_mpy_RR(hia, lob); + mant = Q6_P_mpyacc_RR(mant,hib, loa); + mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1); + + hi = (int) (mant>>32); + + k = Q6_R_normamt_R(hi); + mant = mant << k; + exp = expa + expb - k; + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_dmpy_asm + .type fast2_dmpy_asm, @function +fast2_dmpy_asm: + +#define mantal R0 +#define mantah R1 +#define mantexpa R1:0 +#define mantbl R2 +#define mantbh R3 +#define mantexpb R3:2 +#define expa R4 +#define expb R5 +#define c8001 R12 +#define mantexpd R7:6 +#define mantdh R7 +#define exp R8 +#define lmantc R11:10 +#define kb R9 +#define guard R11 +#define mantal_ R12 +#define mantbl_ R13 +#define min R15:14 +#define minh R15 + + .falign + { + mantbl_= lsr(mantbl, #16) + expb = sxth(mantbl) + expa = sxth(mantal) + mantal_= lsr(mantal, #16) + } + { + lmantc = mpy(mantah, mantbh) + mantexpd = mpy(mantah, mantbl_) + mantal.L = #0x0 + min = #0 + } + { + lmantc = add(lmantc, lmantc) + mantexpd+= mpy(mantbh, mantal_) + mantbl.L = #0x0 + minh.H = #0x8000 + } + { + mantexpd = asr(mantexpd, #15) + c8001.L = #0x8001 + p1 = cmp.eq(mantexpa, mantexpb) + } + { + mantexpd = add(mantexpd, lmantc) + exp = add(expa, expb) + p2 = cmp.eq(mantexpa, min) + } + { + kb = clb(mantexpd) + mantexpb = abs(mantexpd) + guard = #58 + } + { + p1 = and(p1, p2) + exp = sub(exp, kb) + kb = add(kb, #-1) + p0 = cmp.gt(kb, guard) + } + { + exp = add(exp, #1) + mantexpa = asl(mantexpd, kb) + if(p1) jump .Lsat //rarely happens + } + { + mantal = insert(exp,#16, #0) + if(!p0) jumpr r31 + } + { + mantal = insert(c8001,#16, #0) + jumpr r31 + } +.Lsat: + { + mantexpa = #-1 + } + { + mantexpa = lsr(mantexpa, #1) + } + { + mantal = insert(exp,#16, #0) + jumpr r31 + } + +/* ==================================================================== * + int fast2_qd2f(fast2_QDOUBLE a) { + int exp; + long long int manta; + int ic, rnd, mantb; + + manta = a>>32; + exp = Q6_R_sxth_R(a) ; + ic = 0x80000000 & manta; + manta = Q6_R_abs_R_sat(manta); + mantb = (manta + rnd)>>7; + rnd = 0x40 + exp = (exp + 126); + if((manta & 0xff) == rnd) rnd = 0x00; + if((manta & 0x7fffffc0) == 0x7fffffc0) { + manta = 0x0; exp++; + } else { + manta= mantb & 0x007fffff; + } + exp = (exp << 23) & 0x7fffffc0; + ic = Q6_R_addacc_RR(ic, exp, manta); + return (ic); + } + * ==================================================================== */ + + .text + .global fast2_qd2f_asm + .type fast2_qd2f_asm, @function +fast2_qd2f_asm: +#define mantah R1 +#define mantal R0 +#define cff R0 +#define mant R3 +#define expo R4 +#define rnd R5 +#define mask R6 +#define c07f R7 +#define c80 R0 +#define mantb R2 +#define ic R0 + + .falign + { + mant = abs(mantah):sat + expo = sxth(mantal) + rnd = #0x40 + mask.L = #0xffc0 + } + { + cff = extractu(mant, #8, #0) + p2 = cmp.gt(expo, #126) + p3 = cmp.ge(expo, #-126) + mask.H = #0x7fff + } + { + p1 = cmp.eq(cff,#0x40) + if(p1.new) rnd = #0 + expo = add(expo, #126) + if(!p3) jump .Lmin + } + { + p0 = bitsset(mant, mask) + c80.L = #0x0000 + mantb = add(mant, rnd) + c07f = lsr(mask, #8) + } + { + if(p0) expo = add(expo, #1) + if(p0) mant = #0 + mantb = lsr(mantb, #7) + c80.H = #0x8000 + } + { + ic = and(c80, mantah) + mask &= asl(expo, #23) + if(!p0) mant = and(mantb, c07f) + if(p2) jump .Lmax + } + { + ic += add(mask, mant) + jumpr r31 + } +.Lmax: + { + ic.L = #0xffff; + } + { + ic.H = #0x7f7f; + jumpr r31 + } +.Lmin: + { + ic = #0x0 + jumpr r31 + } + +/* ==================================================================== * +fast2_QDOUBLE fast2_f2qd(int ia) { + lint exp; + lint mant; + fast2_QDOUBLE c; + + mant = ((ia << 7) | 0x40000000)&0x7fffff80 ; + if (ia & 0x80000000) mant = -mant; + exp = ((ia >> 23) & 0xFFLL) - 126; + c = (mant<<32) | Q6_R_zxth_R(exp);; + return(c); +} + * ==================================================================== */ + .text + .global fast2_f2qd_asm + .type fast2_f2qd_asm, @function +fast2_f2qd_asm: +#define ia R0 +#define mag R3 +#define mantr R1 +#define expr R0 +#define zero R2 +#define maxneg R5:4 +#define maxnegl R4 + .falign + { + mantr = asl(ia, #7) + p0 = tstbit(ia, #31) + maxneg = #0 + mag = add(ia,ia) + } + { + mantr = setbit(mantr, #30) + expr= extractu(ia,#8,#23) + maxnegl.L = #0x8001 + p1 = cmp.eq(mag, #0) + } + { + mantr= extractu(mantr, #31, #0) + expr= add(expr, #-126) + zero = #0 + if(p1) jump .Lminqd + } + { + expr = zxth(expr) + if(p0) mantr= sub(zero, mantr) + jumpr r31 + } +.Lminqd: + { + R1:0 = maxneg + jumpr r31 + } diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S new file mode 100644 index 000000000000..4192555351a6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S @@ -0,0 +1,345 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/* ==================================================================== * + +fast2_QLDOUBLE fast2_ldadd(fast2_QLDOUBLE a,fast2_QLDOUBLE b) { + fast2_QLDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, expdiff, j, k, hi, lo, cn; + lint mant; + + expdiff = (int) Q6_P_vabsdiffh_PP(a, b); + expdiff = Q6_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) + (mantb>>expb); + + hi = (int) (mant>>32); + lo = (int) (mant); + + k = Q6_R_normamt_R(hi); + if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo); + + mant = (mant << k); + cn = (mant == 0x8000000000000000LL); + exp = exp - k + cn; + + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_ldadd_asm + .type fast2_ldadd_asm, @function +fast2_ldadd_asm: +#define manta R1:0 +#define lmanta R1:0 +#define mantb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define k R4 +#define ce P0 +#define zero R3:2 + .falign + { + expa = memw(r29+#8) + expb = memw(r29+#24) + r7 = r0 + } + { + expd = sub(expa, expb):sat + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + expd = abs(expd):sat + if ( ce) expa = #1 + if (!ce) expb = #1 + c63 = #62 + } { + expd = MIN(expd, c63) + manta = memd(r29+#0) + mantb = memd(r29+#16) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + } { + lmant = add(lmanta, lmantb) + zero = #0 + } { + k = clb(lmant) + c63.L =#0x0001 + } { + exp -= add(k, #-1) //exp = exp - (k-1) + k = add(k, #-1) + p0 = cmp.gt(k, #58) + c63.H =#0x8000 + } { + if(!p0)memw(r7+#8) = exp + lmant = ASL(lmant, k) + if(p0) jump .Ldenorma + } { + memd(r7+#0) = lmant + jumpr r31 + } +.Ldenorma: + memd(r7+#0) = zero + { + memw(r7+#8) = c63 + jumpr r31 + } +/* =================================================================== * + fast2_QLDOUBLE fast2_ldsub(fast2_QLDOUBLE a,fast2_QLDOUBLE b) { + fast2_QLDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, expdiff, j, k; + lint mant; + + expdiff = (int) Q6_P_vabsdiffh_PP(a, b); + expdiff = Q6_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) - (mantb>>expb); + k = Q6_R_clb_P(mant)-1; + mant = (mant << k); + exp = exp - k; + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_ldsub_asm + .type fast2_ldsub_asm, @function +fast2_ldsub_asm: +#define manta R1:0 +#define lmanta R1:0 +#define mantb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define k R4 +#define ce P0 +#define zero R3:2 + .falign + { + expa = memw(r29+#8) + expb = memw(r29+#24) + r7 = r0 + } + { + expd = sub(expa, expb):sat + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + expd = abs(expd):sat + if ( ce) expa = #1 + if (!ce) expb = #1 + c63 = #62 + } { + expd = min(expd, c63) + manta = memd(r29+#0) + mantb = memd(r29+#16) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + } { + lmant = sub(lmanta, lmantb) + zero = #0 + } { + k = clb(lmant) + c63.L =#0x0001 + } { + exp -= add(k, #-1) //exp = exp - (k+1) + k = add(k, #-1) + p0 = cmp.gt(k, #58) + c63.H =#0x8000 + } { + if(!p0)memw(r7+#8) = exp + lmant = asl(lmant, k) + if(p0) jump .Ldenorma_s + } { + memd(r7+#0) = lmant + jumpr r31 + } +.Ldenorma_s: + memd(r7+#0) = zero + { + memw(r7+#8) = c63 + jumpr r31 + } + +/* ==================================================================== * + fast2_QLDOUBLE fast2_ldmpy(fast2_QLDOUBLE a,fast2_QLDOUBLE b) { + fast2_QLDOUBLE c; + lint manta = a & MANTMASK; + int expa = Q6_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = Q6_R_sxth_R(b) ; + int exp, k; + lint mant; + int hia, hib, hi, lo; + unsigned int loa, lob; + + hia = (int)(a >> 32); + loa = Q6_R_extractu_RII((int)manta, 31, 1); + hib = (int)(b >> 32); + lob = Q6_R_extractu_RII((int)mantb, 31, 1); + + mant = Q6_P_mpy_RR(hia, lob); + mant = Q6_P_mpyacc_RR(mant,hib, loa); + mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1); + + hi = (int) (mant>>32); + + k = Q6_R_normamt_R(hi); + mant = mant << k; + exp = expa + expb - k; + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global fast2_ldmpy_asm + .type fast2_ldmpy_asm, @function +fast2_ldmpy_asm: + +#define mantxl_ R9 +#define mantxl R14 +#define mantxh R15 +#define mantx R15:14 +#define mantbl R2 +#define mantbl_ R8 +#define mantbh R3 +#define mantb R3:2 +#define expa R4 +#define expb R5 +#define c8001 R8 +#define mantd R7:6 +#define lmantc R11:10 +#define kp R9 +#define min R13:12 +#define minh R13 +#define max R13:12 +#define maxh R13 +#define ret R0 + + .falign + { + mantx = memd(r29+#0) + mantb = memd(r29+#16) + min = #0 + } + { + mantbl_= extractu(mantbl, #31, #1) + mantxl_= extractu(mantxl, #31, #1) + minh.H = #0x8000 + } + { + lmantc = mpy(mantxh, mantbh) + mantd = mpy(mantxh, mantbl_) + expa = memw(r29+#8) + expb = memw(r29+#24) + } + { + lmantc = add(lmantc, lmantc) + mantd += mpy(mantbh, mantxl_) + } + { + mantd = asr(mantd, #30) + c8001.L = #0x0001 + p1 = cmp.eq(mantx, mantb) + } + { + mantd = add(mantd, lmantc) + expa= add(expa, expb) + p2 = cmp.eq(mantb, min) + } + { + kp = clb(mantd) + c8001.H = #0x8000 + p1 = and(p1, p2) + } + { + expa-= add(kp, #-1) + kp = add(kp, #-1) + if(p1) jump .Lsat + } + { + mantd = asl(mantd, kp) + memw(ret+#8) = expa + p0 = cmp.gt(kp, #58) + if(p0.new) jump:NT .Ldenorm //rarely happens + } + { + memd(ret+#0) = mantd + jumpr r31 + } +.Lsat: + { + max = #0 + expa+= add(kp, #1) + } + { + maxh.H = #0x4000 + memw(ret+#8) = expa + } + { + memd(ret+#0) = max + jumpr r31 + } +.Ldenorm: + { + memw(ret+#8) = c8001 + mantx = #0 + } + { + memd(ret+#0) = mantx + jumpr r31 + } diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S new file mode 100644 index 000000000000..215936b783ca --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S @@ -0,0 +1,400 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/* ==================================================================== */ +/* FUNCTIONS Optimized double floating point operators */ +/* ==================================================================== */ +/* c = dadd_asm(a, b) */ +/* ==================================================================== + +QDOUBLE dadd(QDOUBLE a,QDOUBLE b) { + QDOUBLE c; + lint manta = a & MANTMASK; + int expa = HEXAGON_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = HEXAGON_R_sxth_R(b) ; + int exp, expdiff, j, k, hi, lo, cn; + lint mant; + + expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b); + expdiff = HEXAGON_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) + (mantb>>expb); + + hi = (int) (mant>>32); + lo = (int) (mant); + + k = HEXAGON_R_normamt_R(hi); + if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo); + + mant = (mant << k); + cn = (mant == 0x8000000000000000LL); + exp = exp - k + cn; + + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global dadd_asm + .type dadd_asm, @function +dadd_asm: + +#define manta R0 +#define mantexpa R1:0 +#define lmanta R1:0 +#define mantb R2 +#define mantexpb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define mantexpd R7:6 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define manth R1 +#define mantl R0 +#define zero R7:6 +#define zerol R6 +#define minus R3:2 +#define minusl R2 +#define maxneg R9 +#define minmin R11:10 // exactly 0x800000000000000000LL +#define minminh R11 +#define k R4 +#define kl R5 +#define ce P0 + .falign + { + mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL + c63 = #62 + expa = SXTH(manta) + expb = SXTH(mantb) + } { + expd = SXTH(expd) + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + if ( ce) expa = #1 + if (!ce) expb = #1 + manta.L = #0 + expd = MIN(expd, c63) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + mantb.L = #0 + zero = #0 + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + minmin = #0 + } { + lmant = add(lmanta, lmantb) + minus = #-1 + minminh.H = #0x8000 + } { + k = NORMAMT(manth) + kl = NORMAMT(mantl) + p0 = cmp.eq(manth, zerol) + p1 = cmp.eq(manth, minusl) + } { + p0 = OR(p0, p1) + if(p0.new) k = add(kl, #31) + maxneg.H = #0 + } { + mantexpa = ASL(lmant, k) + exp = SUB(exp, k) + maxneg.L = #0x8001 + } { + p0 = cmp.eq(mantexpa, zero) + p1 = cmp.eq(mantexpa, minus) + manta.L = #0 + exp = ZXTH(exp) + } { + p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0 + if(p2.new) exp = add(exp, #1) + } +#if (__HEXAGON_ARCH__ == 60) + { + p0 = OR(p0, p1) + if( p0.new) manta = OR(manta,maxneg) + if(!p0.new) manta = OR(manta,exp) + } + jumpr r31 +#else + { + p0 = OR(p0, p1) + if( p0.new) manta = OR(manta,maxneg) + if(!p0.new) manta = OR(manta,exp) + jumpr r31 + } +#endif +/* =================================================================== * + QDOUBLE dsub(QDOUBLE a,QDOUBLE b) { + QDOUBLE c; + lint manta = a & MANTMASK; + int expa = HEXAGON_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = HEXAGON_R_sxth_R(b) ; + int exp, expdiff, j, k, hi, lo, cn; + lint mant; + + expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b); + expdiff = HEXAGON_R_sxth_R(expdiff) ; + if (expdiff > 63) { expdiff = 62;} + if (expa > expb) { + exp = expa + 1; + expa = 1; + expb = expdiff + 1; + } else { + exp = expb + 1; + expb = 1; + expa = expdiff + 1; + } + mant = (manta>>expa) - (mantb>>expb); + + hi = (int) (mant>>32); + lo = (int) (mant); + + k = HEXAGON_R_normamt_R(hi); + if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo); + + mant = (mant << k); + cn = (mant == 0x8000000000000000LL); + exp = exp - k + cn; + + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global dsub_asm + .type dsub_asm, @function +dsub_asm: + +#define manta R0 +#define mantexpa R1:0 +#define lmanta R1:0 +#define mantb R2 +#define mantexpb R3:2 +#define lmantb R3:2 +#define expa R4 +#define expb R5 +#define mantexpd R7:6 +#define expd R6 +#define exp R8 +#define c63 R9 +#define lmant R1:0 +#define manth R1 +#define mantl R0 +#define zero R7:6 +#define zerol R6 +#define minus R3:2 +#define minusl R2 +#define maxneg R9 +#define minmin R11:10 // exactly 0x800000000000000000LL +#define minminh R11 +#define k R4 +#define kl R5 +#define ce P0 + .falign + { + mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL + c63 = #62 + expa = SXTH(manta) + expb = SXTH(mantb) + } { + expd = SXTH(expd) + ce = CMP.GT(expa, expb); + if ( ce.new) exp = add(expa, #1) + if (!ce.new) exp = add(expb, #1) + } { + if ( ce) expa = #1 + if (!ce) expb = #1 + manta.L = #0 + expd = MIN(expd, c63) + } { + if (!ce) expa = add(expd, #1) + if ( ce) expb = add(expd, #1) + mantb.L = #0 + zero = #0 + } { + lmanta = ASR(lmanta, expa) + lmantb = ASR(lmantb, expb) + minmin = #0 + } { + lmant = sub(lmanta, lmantb) + minus = #-1 + minminh.H = #0x8000 + } { + k = NORMAMT(manth) + kl = NORMAMT(mantl) + p0 = cmp.eq(manth, zerol) + p1 = cmp.eq(manth, minusl) + } { + p0 = OR(p0, p1) + if(p0.new) k = add(kl, #31) + maxneg.H = #0 + } { + mantexpa = ASL(lmant, k) + exp = SUB(exp, k) + maxneg.L = #0x8001 + } { + p0 = cmp.eq(mantexpa, zero) + p1 = cmp.eq(mantexpa, minus) + manta.L = #0 + exp = ZXTH(exp) + } { + p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0 + if(p2.new) exp = add(exp, #1) + } +#if (__HEXAGON_ARCH__ == 60) + { + p0 = OR(p0, p1) + if( p0.new) manta = OR(manta,maxneg) + if(!p0.new) manta = OR(manta,exp) + } + jumpr r31 +#else + { + p0 = OR(p0, p1) + if( p0.new) manta = OR(manta,maxneg) + if(!p0.new) manta = OR(manta,exp) + jumpr r31 + } +#endif +/* ==================================================================== * + QDOUBLE dmpy(QDOUBLE a,QDOUBLE b) { + QDOUBLE c; + lint manta = a & MANTMASK; + int expa = HEXAGON_R_sxth_R(a) ; + lint mantb = b & MANTMASK; + int expb = HEXAGON_R_sxth_R(b) ; + int exp, k; + lint mant; + int hia, hib, hi, lo; + unsigned int loa, lob; + + hia = (int)(a >> 32); + loa = HEXAGON_R_extractu_RII((int)manta, 31, 1); + hib = (int)(b >> 32); + lob = HEXAGON_R_extractu_RII((int)mantb, 31, 1); + + mant = HEXAGON_P_mpy_RR(hia, lob); + mant = HEXAGON_P_mpyacc_RR(mant,hib, loa); + mant = (mant >> 30) + (HEXAGON_P_mpy_RR(hia, hib)<<1); + + hi = (int) (mant>>32); + lo = (int) (mant); + + k = HEXAGON_R_normamt_R(hi); + if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo); + mant = mant << k; + exp = expa + expb - k; + if (mant == 0 || mant == -1) exp = 0x8001; + c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK); + return(c); + } + * ==================================================================== */ + .text + .global dmpy_asm + .type dmpy_asm, @function +dmpy_asm: + +#define mantal R0 +#define mantah R1 +#define mantexpa R1:0 +#define mantbl R2 +#define mantbh R3 +#define mantexpb R3:2 +#define expa R4 +#define expb R5 +#define mantexpd R7:6 +#define exp R8 +#define lmantc R11:10 +#define mantch R11 +#define mantcl R10 +#define zero0 R7:6 +#define zero0l R6 +#define minus1 R3:2 +#define minus1l R2 +#define maxneg R9 +#define k R4 +#define kl R5 + + .falign + { + mantbl = lsr(mantbl, #16) + mantal = lsr(mantal, #16) + expa = sxth(mantal) + expb = sxth(mantbl) + } + { + lmantc = mpy(mantah, mantbh) + mantexpd = mpy(mantah, mantbl) + } + { + lmantc = add(lmantc, lmantc) //<<1 + mantexpd+= mpy(mantbh, mantal) + } + { + lmantc += asr(mantexpd, #15) + exp = add(expa, expb) + zero0 = #0 + minus1 = #-1 + } + { + k = normamt(mantch) + kl = normamt(mantcl) + p0 = cmp.eq(mantch, zero0l) + p1 = cmp.eq(mantch, minus1l) + } + { + p0 = or(p0, p1) + if(p0.new) k = add(kl, #31) + maxneg.H = #0 + } + { + mantexpa = asl(lmantc, k) + exp = sub(exp, k) + maxneg.L = #0x8001 + } + { + p0 = cmp.eq(mantexpa, zero0) + p1 = cmp.eq(mantexpa, minus1) + mantal.L = #0 + exp = zxth(exp) + } +#if (__HEXAGON_ARCH__ == 60) + { + p0 = or(p0, p1) + if( p0.new) mantal = or(mantal,maxneg) + if(!p0.new) mantal = or(mantal,exp) + } + jumpr r31 +#else + { + p0 = or(p0, p1) + if( p0.new) mantal = or(mantal,maxneg) + if(!p0.new) mantal = or(mantal,exp) + jumpr r31 + } +#endif diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S new file mode 100644 index 000000000000..12378f0da04e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S @@ -0,0 +1,31 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +.macro FUNCTION_BEGIN name +.text +.p2align 5 +.globl \name +.type \name, @function +\name: +.endm + +.macro FUNCTION_END name +.size \name, . - \name +.endm + +FUNCTION_BEGIN fmaf + r2 += sfmpy(r0, r1) + { + r0 = r2 + jumpr r31 + } +FUNCTION_END fmaf + + .globl fmal + .set fmal, fma diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S new file mode 100644 index 000000000000..f3a218c9769b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S @@ -0,0 +1,30 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +.macro FUNCTION_BEGIN name +.text +.p2align 5 +.globl \name +.type \name, @function +\name: +.endm + +.macro FUNCTION_END name +.size \name, . - \name +.endm + +FUNCTION_BEGIN fmaxf + { + r0 = sfmax(r0, r1) + jumpr r31 + } +FUNCTION_END fmaxf + + .globl fmaxl + .set fmaxl, fmax diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S new file mode 100644 index 000000000000..ef9b0ff854a2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S @@ -0,0 +1,30 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +.macro FUNCTION_BEGIN name +.text +.p2align 5 +.globl \name +.type \name, @function +\name: +.endm + +.macro FUNCTION_END name +.size \name, . - \name +.endm + +FUNCTION_BEGIN fminf + { + r0 = sfmin(r0, r1) + jumpr r31 + } +FUNCTION_END fminf + + .globl fminl + .set fminl, fmin diff --git a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S new file mode 100644 index 000000000000..fbe09086cd33 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S @@ -0,0 +1,125 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// An optimized version of a memcpy which is equivalent to the following loop: +// +// volatile unsigned *dest; +// unsigned *src; +// +// for (i = 0; i < num_words; ++i) +// *dest++ = *src++; +// +// The corresponding C prototype for this function would be +// void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest, +// const unsigned *src, +// unsigned num_words); +// +// *** Both dest and src must be aligned to 32-bit boundaries. *** +// The code does not perform any runtime checks for this, and will fail +// in bad ways if this requirement is not met. +// +// The "forward" in the name refers to the fact that the function copies +// the words going forward in memory. It is incorrect to use this function +// for cases where the original code copied words in any other order. +// +// *** This function is only for the use by the compiler. *** +// The only indended use is for the LLVM compiler to generate calls to +// this function, when a mem-copy loop, like the one above, is detected. + + .text + +// Inputs: +// r0: dest +// r1: src +// r2: num_words + + .globl hexagon_memcpy_forward_vp4cp4n2 + .balign 32 + .type hexagon_memcpy_forward_vp4cp4n2,@function +hexagon_memcpy_forward_vp4cp4n2: + + // Compute r3 to be the number of words remaining in the current page. + // At the same time, compute r4 to be the number of 32-byte blocks + // remaining in the page (for prefetch). + { + r3 = sub(##4096, r1) + r5 = lsr(r2, #3) + } + { + // The word count before end-of-page is in the 12 lowest bits of r3. + // (If the address in r1 was already page-aligned, the bits are 0.) + r3 = extractu(r3, #10, #2) + r4 = extractu(r3, #7, #5) + } + { + r3 = minu(r2, r3) + r4 = minu(r5, r4) + } + { + r4 = or(r4, ##2105344) // 2105344 = 0x202000 + p0 = cmp.eq(r3, #0) + if (p0.new) jump:nt .Lskipprolog + } + l2fetch(r1, r4) + { + loop0(.Lprolog, r3) + r2 = sub(r2, r3) // r2 = number of words left after the prolog. + } + .falign +.Lprolog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 +.Lskipprolog: + { + // Let r3 = number of whole pages left (page = 1024 words). + r3 = lsr(r2, #10) + if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain + } + { + loop1(.Lout, r3) + r2 = extractu(r2, #10, #0) // r2 = r2 & 1023 + r3 = ##2105472 // r3 = 0x202080 (prefetch info) + } + // Iterate over pages. + .falign +.Lout: + // Prefetch each individual page. + l2fetch(r1, r3) + loop0(.Lpage, #512) + .falign +.Lpage: + r5:4 = memd(r1++#8) + { + memw(r0++#8) = r4 + memw(r0+#4) = r5 + } :endloop0:endloop1 +.Lskipmain: + { + r3 = ##2105344 // r3 = 0x202000 (prefetch info) + r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining. + p0 = cmp.eq(r2, #0) + if (p0.new) jumpr:nt r31 + } + { + r3 = or(r3, r4) + loop0(.Lepilog, r2) + } + l2fetch(r1, r3) + .falign +.Lepilog: + { + r4 = memw(r1++#4) + memw(r0++#4) = r4.new + } :endloop0 + + jumpr r31 + +.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S new file mode 100644 index 000000000000..bbc85c22db08 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S @@ -0,0 +1,64 @@ +//===------------------------- memcopy routines ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + +FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + { + p0 = bitsclr(r1,#7) + p0 = bitsclr(r0,#7) + if (p0.new) r5:4 = memd(r1) + r3 = #-3 + } + { + if (!p0) jump .Lmemcpy_call + if (p0) memd(r0++#8) = r5:4 + if (p0) r5:4 = memd(r1+#8) + r3 += lsr(r2,#3) + } + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1+#16) + r1 = add(r1,#24) + loop0(1f,r3) + } + .falign +1: + { + memd(r0++#8) = r5:4 + r5:4 = memd(r1++#8) + }:endloop0 + { + memd(r0) = r5:4 + r0 -= add(r2,#-8) + jumpr r31 + } +FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes + +.Lmemcpy_call: +#ifdef __PIC__ + jump memcpy@PLT +#else + jump memcpy +#endif + + .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes + .set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \ + __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes diff --git a/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S b/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S new file mode 100644 index 000000000000..12a0595fe465 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S @@ -0,0 +1,83 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_moddi3 + { + p3 = tstbit(r1,#31) + } + { + r1:0 = abs(r1:0) + r3:2 = abs(r3:2) + } + { + r6 = cl0(r1:0) // count leading 0's of dividend (numerator) + r7 = cl0(r3:2) // count leading 0's of divisor (denominator) + r5:4 = r3:2 // divisor moved into working registers + r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder + } + { + r10 = sub(r7,r6) // left shift count for bit & divisor + r1:0 = #0 // initialize quotient to 0 + r15:14 = #1 // initialize bit to 1 + } + { + r11 = add(r10,#1) // loop count is 1 more than shift count + r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb + r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor + } + { + p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend + loop0(1f,r11) // register loop + } + { + if (p0) jump .hexagon_moddi3_return // if divisor > dividend, we're done, so return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder + } + { + r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder + r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8) + } + { + r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8) + r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6) + } + { + r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration + r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration + }:endloop0 + +.hexagon_moddi3_return: + { + r1:0 = neg(r3:2) + } + { + r1:0 = vmux(p3,r1:0,r3:2) + jumpr r31 + } +FUNCTION_END __hexagon_moddi3 + + .globl __qdsp_moddi3 + .set __qdsp_moddi3, __hexagon_moddi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S new file mode 100644 index 000000000000..5afda9e2978b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S @@ -0,0 +1,66 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_modsi3 + { + p2 = cmp.ge(r0,#0) + r2 = abs(r0) + r1 = abs(r1) + } + { + r3 = cl0(r2) + r4 = cl0(r1) + p0 = cmp.gtu(r1,r2) + } + { + r3 = sub(r4,r3) + if (p0) jumpr r31 + } + { + p1 = cmp.eq(r3,#0) + loop0(1f,r3) + r0 = r2 + r2 = lsl(r1,r3) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + if (p2) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +FUNCTION_END __hexagon_modsi3 + + .globl __qdsp_modsi3 + .set __qdsp_modsi3, __hexagon_modsi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S new file mode 100644 index 000000000000..6bdd4808c2b8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S @@ -0,0 +1,66 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG + +FUNCTION_BEGIN __hexagon_divsf3 + { + r2,p0 = sfrecipa(r0,r1) + r4 = sffixupd(r0,r1) + r3 = ##0x3f800000 // 1.0 + } + { + r5 = sffixupn(r0,r1) + r3 -= sfmpy(r4,r2):lib // 1-(den/recip) yields error? + r6 = ##0x80000000 + r7 = r3 + } + { + r2 += sfmpy(r3,r2):lib + r3 = r7 + r6 = r5 + r0 = and(r6,r5) + } + { + r3 -= sfmpy(r4,r2):lib + r0 += sfmpy(r5,r2):lib + } + { + r2 += sfmpy(r3,r2):lib + r6 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r6,r2):lib + } + { + r5 -= sfmpy(r0,r4):lib + } + { + r0 += sfmpy(r5,r2,p0):scale + jumpr r31 + } +FUNCTION_END __hexagon_divsf3 + +Q6_ALIAS(divsf3) +FAST_ALIAS(divsf3) +FAST2_ALIAS(divsf3) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S new file mode 100644 index 000000000000..7f619002774f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S @@ -0,0 +1,82 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + +#define RIN r0 +#define S r0 +#define H r1 +#define D r2 +#define E r3 +#define HALF r4 +#define R r5 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG + +FUNCTION_BEGIN __hexagon_sqrtf + { + E,p0 = sfinvsqrta(RIN) + R = sffixupr(RIN) + HALF = ##0x3f000000 // 0.5 + r1:0 = combine(#0,#0) // clear S/H + } + { + S += sfmpy(E,R):lib // S0 + H += sfmpy(E,HALF):lib // H0 + D = HALF + E = R + } + { + D -= sfmpy(S,H):lib // d0 + p1 = sfclass(R,#1) // is zero? + //E -= sfmpy(S,S):lib // e0 + } + { + S += sfmpy(S,D):lib // S1 + H += sfmpy(H,D):lib // H1 + D = HALF + E = R + } + { + D -= sfmpy(S,H):lib // d0 + E -= sfmpy(S,S):lib // e0 + } + { + S += sfmpy(H,E):lib // S2 + H += sfmpy(H,D):lib // H2 + D = HALF + E = R + } + { + //D -= sfmpy(S,H):lib // d2 + E -= sfmpy(S,S):lib // e2 + if (p1) r0 = or(r0,R) // sqrt(-0.0) = -0.0 + } + { + S += sfmpy(H,E,p0):scale // S3 + jumpr r31 + } + +FUNCTION_END __hexagon_sqrtf + +Q6_ALIAS(sqrtf) +FAST_ALIAS(sqrtf) +FAST2_ALIAS(sqrtf) diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S b/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S new file mode 100644 index 000000000000..1ca326b75208 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S @@ -0,0 +1,71 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_udivdi3 + { + r6 = cl0(r1:0) // count leading 0's of dividend (numerator) + r7 = cl0(r3:2) // count leading 0's of divisor (denominator) + r5:4 = r3:2 // divisor moved into working registers + r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder + } + { + r10 = sub(r7,r6) // left shift count for bit & divisor + r1:0 = #0 // initialize quotient to 0 + r15:14 = #1 // initialize bit to 1 + } + { + r11 = add(r10,#1) // loop count is 1 more than shift count + r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb + r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor + } + { + p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend + loop0(1f,r11) // register loop + } + { + if (p0) jumpr r31 // if divisor > dividend, we're done, so return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder + } + { + r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder + r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8) + } + { + r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8) + r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6) + } + { + r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration + r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration + }:endloop0 + { + jumpr r31 // return + } +FUNCTION_END __hexagon_udivdi3 + + .globl __qdsp_udivdi3 + .set __qdsp_udivdi3, __hexagon_udivdi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S b/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S new file mode 100644 index 000000000000..deb5aae0924d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S @@ -0,0 +1,71 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_udivmoddi4 + { + r6 = cl0(r1:0) // count leading 0's of dividend (numerator) + r7 = cl0(r3:2) // count leading 0's of divisor (denominator) + r5:4 = r3:2 // divisor moved into working registers + r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder + } + { + r10 = sub(r7,r6) // left shift count for bit & divisor + r1:0 = #0 // initialize quotient to 0 + r15:14 = #1 // initialize bit to 1 + } + { + r11 = add(r10,#1) // loop count is 1 more than shift count + r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb + r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor + } + { + p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend + loop0(1f,r11) // register loop + } + { + if (p0) jumpr r31 // if divisor > dividend, we're done, so return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder + } + { + r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder + r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8) + } + { + r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8) + r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6) + } + { + r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration + r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration + }:endloop0 + { + jumpr r31 // return + } +FUNCTION_END __hexagon_udivmoddi4 + + .globl __qdsp_udivmoddi4 + .set __qdsp_udivmoddi4, __hexagon_udivmoddi4 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S b/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S new file mode 100644 index 000000000000..25bbe7cd5925 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S @@ -0,0 +1,60 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_udivmodsi4 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + p0 = cmp.eq(r6,#0) + if (p0.new) r4 = #0 + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r4) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivmodsi4 + + .globl __qdsp_udivmodsi4 + .set __qdsp_udivmodsi4, __hexagon_udivmodsi4 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S new file mode 100644 index 000000000000..54f0aa409f93 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S @@ -0,0 +1,56 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_udivsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +FUNCTION_END __hexagon_udivsi3 + + .globl __qdsp_udivsi3 + .set __qdsp_udivsi3, __hexagon_udivsi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S b/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S new file mode 100644 index 000000000000..f091521414af --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S @@ -0,0 +1,74 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_umoddi3 + { + r6 = cl0(r1:0) // count leading 0's of dividend (numerator) + r7 = cl0(r3:2) // count leading 0's of divisor (denominator) + r5:4 = r3:2 // divisor moved into working registers + r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder + } + { + r10 = sub(r7,r6) // left shift count for bit & divisor + r1:0 = #0 // initialize quotient to 0 + r15:14 = #1 // initialize bit to 1 + } + { + r11 = add(r10,#1) // loop count is 1 more than shift count + r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb + r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor + } + { + p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend + loop0(1f,r11) // register loop + } + { + if (p0) jump .hexagon_umoddi3_return // if divisor > dividend, we're done, so return + } + .falign +1: + { + p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder + } + { + r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder + r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8) + } + { + r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8) + r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6) + } + { + r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration + r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration + }:endloop0 + +.hexagon_umoddi3_return: + { + r1:0 = r3:2 + jumpr r31 + } +FUNCTION_END __hexagon_umoddi3 + + .globl __qdsp_umoddi3 + .set __qdsp_umoddi3, __hexagon_umoddi3 diff --git a/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S new file mode 100644 index 000000000000..a8270c2030d5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S @@ -0,0 +1,55 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + .macro FUNCTION_BEGIN name + .text + .p2align 5 + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + +FUNCTION_BEGIN __hexagon_umodsi3 + { + r2 = cl0(r0) + r3 = cl0(r1) + p0 = cmp.gtu(r1,r0) + } + { + r2 = sub(r3,r2) + if (p0) jumpr r31 + } + { + loop0(1f,r2) + p1 = cmp.eq(r2,#0) + r2 = lsl(r1,r2) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + jumpr r31 + } +FUNCTION_END __hexagon_umodsi3 + + .globl __qdsp_umodsi3 + .set __qdsp_umodsi3, __hexagon_umodsi3 diff --git a/contrib/compiler-rt/lib/builtins/i386/ashldi3.S b/contrib/compiler-rt/lib/builtins/i386/ashldi3.S new file mode 100644 index 000000000000..6f05dcf74443 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/ashldi3.S @@ -0,0 +1,61 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __ashldi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashldi3) + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psllq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__ashldi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashldi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + shldl %cl, %eax, %edx // left shift high by count + shll %cl, %eax // left shift low by count + ret + +1: movl %eax, %edx // Move low to high + xorl %eax, %eax // clear low + shll %cl, %edx // shift high by count - 32 + ret +END_COMPILERRT_FUNCTION(__ashldi3) + +#endif // __SSE2__ +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S b/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S new file mode 100644 index 000000000000..206369f360aa --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S @@ -0,0 +1,72 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __ashrdi3(di_int input, int count); + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashrdi3) + movd 12(%esp), %xmm2 // Load count + movl 8(%esp), %eax +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + + psrlq %xmm2, %xmm0 // unsigned shift input by count + + testl %eax, %eax // check the sign-bit of the input + jns 1f // early out for positive inputs + + // If the input is negative, we need to construct the shifted sign bit + // to or into the result, as xmm does not have a signed right shift. + pcmpeqb %xmm1, %xmm1 // -1ULL + psrlq $58, %xmm1 // 0x3f + pandn %xmm1, %xmm2 // 63 - count + pcmpeqb %xmm1, %xmm1 // -1ULL + psubq %xmm1, %xmm2 // 64 - count + psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits + por %xmm1, %xmm0 + + // Move the result back to the general purpose registers and return +1: movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__ashrdi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__ashrdi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + + shrdl %cl, %edx, %eax // right shift low by count + sarl %cl, %edx // right shift high by count + ret + +1: movl %edx, %eax // Move high to low + sarl $31, %edx // clear high + sarl %cl, %eax // shift low by count - 32 + ret +END_COMPILERRT_FUNCTION(__ashrdi3) + +#endif // __SSE2__ +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk.S b/contrib/compiler-rt/lib/builtins/i386/chkstk.S new file mode 100644 index 000000000000..b59974868f21 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/chkstk.S @@ -0,0 +1,34 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__chkstk_ms) + push %ecx + push %eax + cmp $0x1000,%eax + lea 12(%esp),%ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + pop %eax + pop %ecx + ret +END_COMPILERRT_FUNCTION(__chkstk_ms) + +#endif // __i386__ diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk2.S b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S new file mode 100644 index 000000000000..7d65bb088928 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S @@ -0,0 +1,40 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __i386__ + +// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments, +// then decrement %esp by %eax. Preserves all registers except %esp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function +DEFINE_COMPILERRT_FUNCTION(__chkstk) + push %ecx + cmp $0x1000,%eax + lea 8(%esp),%ecx // esp before calling this routine -> ecx + jb 1f +2: + sub $0x1000,%ecx + test %ecx,(%ecx) + sub $0x1000,%eax + cmp $0x1000,%eax + ja 2b +1: + sub %eax,%ecx + test %ecx,(%ecx) + + lea 4(%esp),%eax // load pointer to the return address into eax + mov %ecx,%esp // install the new top of stack pointer into esp + mov -4(%eax),%ecx // restore ecx + push (%eax) // push return address onto the stack + sub %esp,%eax // restore the original value in eax + ret +END_COMPILERRT_FUNCTION(__chkstk) +END_COMPILERRT_FUNCTION(_alloca) + +#endif // __i386__ diff --git a/contrib/compiler-rt/lib/builtins/i386/divdi3.S b/contrib/compiler-rt/lib/builtins/i386/divdi3.S new file mode 100644 index 000000000000..2fb4bdcad90d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/divdi3.S @@ -0,0 +1,165 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __divdi3(di_int a, di_int b); + +// result = a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__divdi3) + +/* This is currently implemented by wrapping the unsigned divide up in an absolute + value, then restoring the correct sign at the end of the computation. This could + certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + movl %ecx, %esi // set aside sign of b + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %ebx // Restore callee-save registers + popl %esi + retl // Return +END_COMPILERRT_FUNCTION(__divdi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdidf.S b/contrib/compiler-rt/lib/builtins/i386/floatdidf.S new file mode 100644 index 000000000000..d75dfe62d6a7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatdidf.S @@ -0,0 +1,42 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop32: + .quad 0x41f0000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdidf) + cvtsi2sd 8(%esp), %xmm1 + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + mulsd REL_ADDR(twop32), %xmm1 // a_hi as a double (without rounding) + movsd REL_ADDR(twop52), %xmm2 // 0x1.0p52 + subsd %xmm2, %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd %xmm2, %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdidf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdisf.S b/contrib/compiler-rt/lib/builtins/i386/floatdisf.S new file mode 100644 index 000000000000..0874eaaa9a98 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatdisf.S @@ -0,0 +1,35 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatdisf(di_int a); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdisf) +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + fstps 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdisf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdixf.S b/contrib/compiler-rt/lib/builtins/i386/floatdixf.S new file mode 100644 index 000000000000..1044ef55a1a8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatdixf.S @@ -0,0 +1,33 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatdixf(di_int a); + +#ifdef __i386__ + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatdixf) +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatdixf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundidf.S b/contrib/compiler-rt/lib/builtins/i386/floatundidf.S new file mode 100644 index 000000000000..fe032348e829 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatundidf.S @@ -0,0 +1,55 @@ +//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52: + .quad 0x4530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundidf) + movss 8(%esp), %xmm1 // high 32 bits of a + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + orpd REL_ADDR(twop84), %xmm1 // 0x1p84 + a_hi (no rounding occurs) + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundidf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundisf.S b/contrib/compiler-rt/lib/builtins/i386/floatundisf.S new file mode 100644 index 000000000000..16000b576026 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatundisf.S @@ -0,0 +1,108 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatundisf(du_int a); + +// Note that there is a hardware instruction, fildll, that does most of what +// this function needs to do. However, because of our ia32 ABI, it will take +// a write-small read-large stall, so the software implementation here is +// actually several cycles faster. + +// This is a branch-free implementation. A branchy implementation might be +// faster for the common case if you know something a priori about the input +// distribution. + +/* branch-free x87 implementation - one cycle slower than without x87. + +#ifdef __i386__ + +CONST_SECTION +.balign 3 + + .quad 0x43f0000000000000 +twop64: .quad 0x0000000000000000 + +#define TWOp64 twop64-0b(%ecx,%eax,8) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + calll 0f +0: popl %ecx + sarl $31, %eax + movq %xmm0, 4(%esp) + fildll 4(%esp) + faddl TWOp64 + fstps 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __i386__ + +*/ + +/* branch-free, x87-free implementation - faster at the expense of code size */ + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + .quad 0x0000000000000fff + + .balign 16 +sticky: + .quad 0x0000000000000000 + .long 0x00000012 + + .balign 16 +twelve: + .long 0x00000000 + +#define TWOp52 twop52-0b(%ecx) +#define STICKY sticky-0b(%ecx,%eax,8) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + + calll 0f +0: popl %ecx + shrl %eax // high 31 bits of input as sint32 + addl $0x7ff80000, %eax + sarl $31, %eax // (big input) ? -1 : 0 + movsd STICKY, %xmm1 // (big input) ? 0xfff : 0 + movl $12, %edx + andl %eax, %edx // (big input) ? 12 : 0 + movd %edx, %xmm3 + andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0 + movsd TWOp52, %xmm2 // 0x1.0p52 + psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input + orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input) + orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input) + subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 | input & 0xfff) : input) + cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 | input & 0xfff) : input) + pslld $23, %xmm3 + paddd %xmm3, %xmm0 // (float)input + movd %xmm0, 4(%esp) + flds 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundixf.S b/contrib/compiler-rt/lib/builtins/i386/floatundixf.S new file mode 100644 index 000000000000..c935670cb52f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/floatundixf.S @@ -0,0 +1,46 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// long double __floatundixf(du_int a);16 + +#ifdef __i386__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + calll 0f +0: popl %eax + movss 8(%esp), %xmm0 // hi 32 bits of input + movss 4(%esp), %xmm1 // lo 32 bits of input + orpd REL_ADDR(twop84), %xmm0 // 2^84 + hi (as a double) + orpd REL_ADDR(twop52), %xmm1 // 2^52 + lo (as a double) + addsd REL_ADDR(twop84_plus_twop52_neg), %xmm0 // hi - 2^52 (no rounding occurs) + movsd %xmm1, 4(%esp) + fldl 4(%esp) + movsd %xmm0, 4(%esp) + faddl 4(%esp) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S b/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S new file mode 100644 index 000000000000..53e95cf76527 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S @@ -0,0 +1,62 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __lshrdi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__lshrdi3) + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psrlq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret +END_COMPILERRT_FUNCTION(__lshrdi3) + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__lshrdi3) + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 1f // goto 1 + + shrdl %cl, %edx, %eax // right shift low by count + shrl %cl, %edx // right shift high by count + ret + +1: movl %edx, %eax // Move high to low + xorl %edx, %edx // clear high + shrl %cl, %eax // shift low by count - 32 + ret +END_COMPILERRT_FUNCTION(__lshrdi3) + +#endif // __SSE2__ +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/moddi3.S b/contrib/compiler-rt/lib/builtins/i386/moddi3.S new file mode 100644 index 000000000000..a5bf9ce8ea0f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/moddi3.S @@ -0,0 +1,169 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __moddi3(di_int a, di_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__moddi3) + +/* This is currently implemented by wrapping the unsigned modulus up in an absolute + value. This could certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + movl %ecx, %esi // set aside sign of a + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %esi + retl // Return +END_COMPILERRT_FUNCTION(__moddi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/muldi3.S b/contrib/compiler-rt/lib/builtins/i386/muldi3.S new file mode 100644 index 000000000000..12394606421c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/muldi3.S @@ -0,0 +1,33 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// di_int __muldi3(di_int a, di_int b); + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__muldi3) + pushl %ebx + movl 16(%esp), %eax // b.lo + movl 12(%esp), %ecx // a.hi + imull %eax, %ecx // b.lo * a.hi + + movl 8(%esp), %edx // a.lo + movl 20(%esp), %ebx // b.hi + imull %edx, %ebx // a.lo * b.hi + + mull %edx // EDX:EAX = a.lo * b.lo + addl %ecx, %ebx // EBX = (a.lo*b.hi + a.hi*b.lo) + addl %ebx, %edx + + popl %ebx + retl +END_COMPILERRT_FUNCTION(__muldi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/udivdi3.S b/contrib/compiler-rt/lib/builtins/i386/udivdi3.S new file mode 100644 index 000000000000..727613639b12 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/udivdi3.S @@ -0,0 +1,118 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// du_int __udivdi3(du_int a, du_int b); + +// result = a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__udivdi3) + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + retl // and return qhi:qlo +END_COMPILERRT_FUNCTION(__udivdi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/i386/umoddi3.S b/contrib/compiler-rt/lib/builtins/i386/umoddi3.S new file mode 100644 index 000000000000..763e821946c0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/i386/umoddi3.S @@ -0,0 +1,129 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// du_int __umoddi3(du_int a, du_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__umoddi3) + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + retl // +END_COMPILERRT_FUNCTION(__umoddi3) + +#endif // __i386__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/int_endianness.h b/contrib/compiler-rt/lib/builtins/int_endianness.h new file mode 100644 index 000000000000..e2586c56bac8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_endianness.h @@ -0,0 +1,116 @@ +/* ===-- int_endianness.h - configuration header for compiler-rt ------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is a configuration header for compiler-rt. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + defined(__ORDER_LITTLE_ENDIAN__) + +/* Clang and GCC provide built-in endianness definitions. */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* __BYTE_ORDER__ */ + +#else /* Compilers other than Clang or GCC. */ + +#if defined(__SVR4) && defined(__sun) +#include <sys/byteorder.h> + +#if defined(_BIG_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif defined(_LITTLE_ENDIAN) +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#else /* !_LITTLE_ENDIAN */ +#error "unknown endianness" +#endif /* !_LITTLE_ENDIAN */ + +#endif /* Solaris and AuroraUX. */ + +/* .. */ + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ + defined(__minix) +#include <sys/endian.h> + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* *BSD */ + +#if defined(__OpenBSD__) +#include <machine/endian.h> + +#if _BYTE_ORDER == _BIG_ENDIAN +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#elif _BYTE_ORDER == _LITTLE_ENDIAN +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif /* _BYTE_ORDER */ + +#endif /* OpenBSD */ + +/* .. */ + +/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the + * compiler (at least with GCC) */ +#if defined(__APPLE__) || defined(__ellcc__ ) + +#ifdef __BIG_ENDIAN__ +#if __BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#endif +#endif /* __BIG_ENDIAN__ */ + +#ifdef __LITTLE_ENDIAN__ +#if __LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif +#endif /* __LITTLE_ENDIAN__ */ + +#endif /* Mac OSX */ + +/* .. */ + +#if defined(_WIN32) + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif /* Windows */ + +#endif /* Clang or GCC. */ + +/* . */ + +#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) +#error Unable to determine endian +#endif /* Check we found an endianness correctly. */ + +#endif /* INT_ENDIANNESS_H */ diff --git a/contrib/compiler-rt/lib/builtins/int_lib.h b/contrib/compiler-rt/lib/builtins/int_lib.h new file mode 100644 index 000000000000..e07867d5580f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_lib.h @@ -0,0 +1,163 @@ +/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is a configuration header for compiler-rt. + * This file is not part of the interface of this library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_LIB_H +#define INT_LIB_H + +/* Assumption: Signed integral is 2's complement. */ +/* Assumption: Right shift of signed negative is arithmetic shift. */ +/* Assumption: Endianness is little or big (not mixed). */ + +#if defined(__ELF__) +#define FNALIAS(alias_name, original_name) \ + void alias_name() __attribute__((__alias__(#original_name))) +#define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee))) +#else +#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") +#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")") +#endif + +/* ABI macro definitions */ + +#if __ARM_EABI__ +# if defined(COMPILER_RT_ARMHF_TARGET) || (!defined(__clang__) && \ + defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 5)) +/* The pcs attribute was introduced in GCC 4.5.0 */ +# define COMPILER_RT_ABI +# else +# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +# endif +#else +# define COMPILER_RT_ABI +#endif + +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + +#if defined(_MSC_VER) && !defined(__clang__) +#define ALWAYS_INLINE __forceinline +#define NOINLINE __declspec(noinline) +#define NORETURN __declspec(noreturn) +#define UNUSED +#else +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#endif + +#if (defined(__FreeBSD__) || defined(__NetBSD__)) && (defined(_KERNEL) || defined(_STANDALONE)) +/* + * Kernel and boot environment can't use normal headers, + * so use the equivalent system headers. + */ +#ifdef __FreeBSD__ +# include <sys/limits.h> +#else +# include <machine/limits.h> +#endif +# include <sys/stdint.h> +# include <sys/types.h> +#else +/* Include the standard compiler builtin headers we use functionality from. */ +# include <limits.h> +# include <stdint.h> +# include <stdbool.h> +# include <float.h> +#endif + +/* Include the commonly used internal type definitions. */ +#include "int_types.h" + +/* Include internal utility function declarations. */ +#include "int_util.h" + +/* + * Workaround for LLVM bug 11663. Prevent endless recursion in + * __c?zdi2(), where calls to __builtin_c?z() are expanded to + * __c?zdi2() instead of __c?zsi2(). + * + * Instead of placing this workaround in c?zdi2.c, put it in this + * global header to prevent other C files from making the detour + * through __c?zdi2() as well. + * + * This problem has been observed on FreeBSD for sparc64 and + * mips64 with GCC 4.2.1, and for riscv with GCC 5.2.0. + * Presumably it's any version of GCC, and targeting an arch that + * does not have dedicated bit counting instructions. + */ +#if defined(__FreeBSD__) && (defined(__sparc64__) || \ + defined(__mips_n32) || defined(__mips_n64) || defined(__mips_o64) || \ + defined(__riscv)) +si_int __clzsi2(si_int); +si_int __ctzsi2(si_int); +#define __builtin_clz __clzsi2 +#define __builtin_ctz __ctzsi2 +#endif /* FreeBSD && (sparc64 || mips_n32 || mips_n64 || mips_o64 || riscv) */ + +COMPILER_RT_ABI si_int __paritysi2(si_int a); +COMPILER_RT_ABI si_int __paritydi2(di_int a); + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); + +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); +#ifdef CRT_HAS_128BIT +COMPILER_RT_ABI si_int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); +#endif + +/* Definitions for builtins unavailable on MSVC */ +#if defined(_MSC_VER) && !defined(__clang__) +#include <intrin.h> + +uint32_t __inline __builtin_ctz(uint32_t value) { + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, value)) + return trailing_zero; + return 32; +} + +uint32_t __inline __builtin_clz(uint32_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse(&leading_zero, value)) + return 31 - leading_zero; + return 32; +} + +#if defined(_M_ARM) || defined(_M_X64) +uint32_t __inline __builtin_clzll(uint64_t value) { + unsigned long leading_zero = 0; + if (_BitScanReverse64(&leading_zero, value)) + return 63 - leading_zero; + return 64; +} +#else +uint32_t __inline __builtin_clzll(uint64_t value) { + if (value == 0) + return 64; + uint32_t msh = (uint32_t)(value >> 32); + uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); + if (msh != 0) + return __builtin_clz(msh); + return 32 + __builtin_clz(lsh); +} +#endif + +#define __builtin_clzl __builtin_clzll +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +#endif /* INT_LIB_H */ diff --git a/contrib/compiler-rt/lib/builtins/int_math.h b/contrib/compiler-rt/lib/builtins/int_math.h new file mode 100644 index 000000000000..aa3d0721a8ab --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_math.h @@ -0,0 +1,110 @@ +/* ===-- int_math.h - internal math inlines ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===-----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines substitutes for the libm functions used in some of the + * compiler-rt implementations, defined in such a way that there is not a direct + * dependency on libm or math.h. Instead, we use the compiler builtin versions + * where available. This reduces our dependencies on the system SDK by foisting + * the responsibility onto the compiler. + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef INT_MATH_H +#define INT_MATH_H + +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#include <math.h> +#include <stdlib.h> +#include <ymath.h> +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define CRT_INFINITY INFINITY +#else +#define CRT_INFINITY __builtin_huge_valf() +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_isfinite(x) _finite((x)) +#define crt_isinf(x) !_finite((x)) +#define crt_isnan(x) _isnan((x)) +#else +/* Define crt_isfinite in terms of the builtin if available, otherwise provide + * an alternate version in terms of our other functions. This supports some + * versions of GCC which didn't have __builtin_isfinite. + */ +#if __has_builtin(__builtin_isfinite) +# define crt_isfinite(x) __builtin_isfinite((x)) +#elif defined(__GNUC__) +# define crt_isfinite(x) \ + __extension__(({ \ + __typeof((x)) x_ = (x); \ + !crt_isinf(x_) && !crt_isnan(x_); \ + })) +#else +# error "Do not know how to check for infinity" +#endif /* __has_builtin(__builtin_isfinite) */ +#define crt_isinf(x) __builtin_isinf((x)) +#define crt_isnan(x) __builtin_isnan((x)) +#endif /* _MSC_VER */ + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_copysign(x, y) copysign((x), (y)) +#define crt_copysignf(x, y) copysignf((x), (y)) +#define crt_copysignl(x, y) copysignl((x), (y)) +#else +#define crt_copysign(x, y) __builtin_copysign((x), (y)) +#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) +#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fabs(x) fabs((x)) +#define crt_fabsf(x) fabsf((x)) +#define crt_fabsl(x) fabs((x)) +#else +#define crt_fabs(x) __builtin_fabs((x)) +#define crt_fabsf(x) __builtin_fabsf((x)) +#define crt_fabsl(x) __builtin_fabsl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_fmax(x, y) __max((x), (y)) +#define crt_fmaxf(x, y) __max((x), (y)) +#define crt_fmaxl(x, y) __max((x), (y)) +#else +#define crt_fmax(x, y) __builtin_fmax((x), (y)) +#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y)) +#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_logbl(x) logbl((x)) +#else +#define crt_logbl(x) __builtin_logbl((x)) +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define crt_scalbn(x, y) scalbn((x), (y)) +#define crt_scalbnf(x, y) scalbnf((x), (y)) +#define crt_scalbnl(x, y) scalbnl((x), (y)) +#else +#define crt_scalbn(x, y) __builtin_scalbn((x), (y)) +#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y)) +#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) +#endif + +#endif /* INT_MATH_H */ diff --git a/contrib/compiler-rt/lib/builtins/int_types.h b/contrib/compiler-rt/lib/builtins/int_types.h new file mode 100644 index 000000000000..4c91b75bac6c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_types.h @@ -0,0 +1,189 @@ +/* ===-- int_lib.h - configuration header for compiler-rt -----------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines various standard types, most importantly a number of unions + * used to access parts of larger types. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */ +#ifdef si_int +#undef si_int +#endif +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +typedef union +{ + di_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + si_int high; +#else + si_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} dwords; + +typedef union +{ + du_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + su_int high; +#else + su_int high; + su_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} udwords; + +#if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ + defined(__riscv) || defined(_WIN64) +#define CRT_HAS_128BIT +#endif + +/* MSVC doesn't have a working 128bit integer type. Users should really compile + * compiler-rt with clang, but if they happen to be doing a standalone build for + * asan or something else, disable the 128 bit parts so things sort of work. + */ +#if defined(_MSC_VER) && !defined(__clang__) +#undef CRT_HAS_128BIT +#endif + +#ifdef CRT_HAS_128BIT +typedef int ti_int __attribute__ ((mode (TI))); +typedef unsigned tu_int __attribute__ ((mode (TI))); + +typedef union +{ + ti_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} twords; + +typedef union +{ + tu_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif /* _YUGA_LITTLE_ENDIAN */ + }s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; + r.s.high = h; + r.s.low = l; + return r.all; +} + +#endif /* CRT_HAS_128BIT */ + +#ifndef _STANDALONE +typedef union +{ + su_int u; + float f; +} float_bits; + +typedef union +{ + udwords u; + double f; +} double_bits; +#endif + +typedef struct +{ +#if _YUGA_LITTLE_ENDIAN + udwords low; + udwords high; +#else + udwords high; + udwords low; +#endif /* _YUGA_LITTLE_ENDIAN */ +} uqwords; + +/* Check if the target supports 80 bit extended precision long doubles. + * Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC + * still makes it 80 bits. Clang will match whatever compiler it is trying to + * be compatible with. + */ +#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \ + defined(__m68k__) || defined(__ia64__) +#define HAS_80_BIT_LONG_DOUBLE 1 +#else +#define HAS_80_BIT_LONG_DOUBLE 0 +#endif + +#ifndef _STANDALONE +typedef union +{ + uqwords u; + long double f; +} long_double_bits; + +#if __STDC_VERSION__ >= 199901L +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; + +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) +#else +typedef struct { float real, imaginary; } Fcomplex; + +typedef struct { double real, imaginary; } Dcomplex; + +typedef struct { long double real, imaginary; } Lcomplex; + +#define COMPLEX_REAL(x) (x).real +#define COMPLEX_IMAGINARY(x) (x).imaginary +#endif +#endif +#endif /* INT_TYPES_H */ + diff --git a/contrib/compiler-rt/lib/builtins/int_util.c b/contrib/compiler-rt/lib/builtins/int_util.c new file mode 100644 index 000000000000..752f2015580e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_util.c @@ -0,0 +1,71 @@ +/* ===-- int_util.c - Implement internal utilities --------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_util.h" + +/* NOTE: The definitions in this file are declared weak because we clients to be + * able to arbitrarily package individual functions into separate .a files. If + * we did not declare these weak, some link situations might end up seeing + * duplicate strong definitions of the same symbol. + * + * We can't use this solution for kernel use (which may not support weak), but + * currently expect that when built for kernel use all the functionality is + * packaged into a single library. + */ + +#ifdef KERNEL_USE + +NORETURN extern void panic(const char *, ...); +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +void __compilerrt_abort_impl(const char *file, int line, const char *function) { + panic("%s:%d: abort in %s", file, line, function); +} + +#elif __APPLE__ + +/* from libSystem.dylib */ +NORETURN extern void __assert_rtn(const char *func, const char *file, int line, + const char *message); + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void __compilerrt_abort_impl(const char *file, int line, const char *function) { + __assert_rtn(function, file, line, "libcompiler_rt abort"); +} + +#elif __Fuchsia__ + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void __compilerrt_abort_impl(const char *file, int line, const char *function) { + __builtin_trap(); +} + +#else + +/* Get the system definition of abort() */ +#include <stdlib.h> + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void __compilerrt_abort_impl(const char *file, int line, const char *function) { + abort(); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/int_util.h b/contrib/compiler-rt/lib/builtins/int_util.h new file mode 100644 index 000000000000..c3c87381ad8a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/int_util.h @@ -0,0 +1,33 @@ +/* ===-- int_util.h - internal utility functions ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===-----------------------------------------------------------------------=== + * + * This file is not part of the interface of this library. + * + * This file defines non-inline utilities which are available for use in the + * library. The function definitions themselves are all contained in int_util.c + * which will always be compiled into any compiler-rt library. + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef INT_UTIL_H +#define INT_UTIL_H + +/** \brief Trigger a program abort (or panic for kernel code). */ +#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__) + +NORETURN void __compilerrt_abort_impl(const char *file, int line, + const char *function); + +#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) +#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) +#define COMPILE_TIME_ASSERT2(expr, cnt) \ + typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED + +#endif /* INT_UTIL_H */ diff --git a/contrib/compiler-rt/lib/builtins/lshrdi3.c b/contrib/compiler-rt/lib/builtins/lshrdi3.c new file mode 100644 index 000000000000..67b2a7668345 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/lshrdi3.c @@ -0,0 +1,45 @@ +/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __lshrdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: logical a >> b */ + +/* Precondition: 0 <= b < bits_in_dword */ + +COMPILER_RT_ABI di_int +__lshrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + udwords input; + udwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_word); + } + else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) COMPILER_RT_ALIAS(__lshrdi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/lshrti3.c b/contrib/compiler-rt/lib/builtins/lshrti3.c new file mode 100644 index 000000000000..e4170ff84a5a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/lshrti3.c @@ -0,0 +1,45 @@ +/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __lshrti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: logical a >> b */ + +/* Precondition: 0 <= b < bits_in_tword */ + +COMPILER_RT_ABI ti_int +__lshrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + utwords input; + utwords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_dword); + } + else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c b/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c new file mode 100644 index 000000000000..c462e0dbf654 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c @@ -0,0 +1,36 @@ +/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI di_int __fixdfdi(double a); +COMPILER_RT_ABI di_int __fixsfdi(float a); +COMPILER_RT_ABI du_int __fixunsdfdi(double a); +COMPILER_RT_ABI du_int __fixunssfdi(float a); +COMPILER_RT_ABI double __floatdidf(di_int a); +COMPILER_RT_ABI float __floatdisf(di_int a); +COMPILER_RT_ABI double __floatundidf(du_int a); +COMPILER_RT_ABI float __floatundisf(du_int a); + +COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); } + +COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); } + +COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); } + +COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); } + +COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); } + +COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); } + +COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); } + +COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); } diff --git a/contrib/compiler-rt/lib/builtins/moddi3.c b/contrib/compiler-rt/lib/builtins/moddi3.c new file mode 100644 index 000000000000..a04279e3875f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/moddi3.c @@ -0,0 +1,30 @@ +/*===-- moddi3.c - Implement __moddi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __moddi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI di_int +__moddi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + du_int r; + __udivmoddi4(a, b, &r); + return ((di_int)r ^ s) - s; /* negate if s == -1 */ +} diff --git a/contrib/compiler-rt/lib/builtins/modsi3.c b/contrib/compiler-rt/lib/builtins/modsi3.c new file mode 100644 index 000000000000..86c73ce13777 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/modsi3.c @@ -0,0 +1,23 @@ +/* ===-- modsi3.c - Implement __modsi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __modsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI si_int +__modsi3(si_int a, si_int b) +{ + return a - __divsi3(a, b) * b; +} diff --git a/contrib/compiler-rt/lib/builtins/modti3.c b/contrib/compiler-rt/lib/builtins/modti3.c new file mode 100644 index 000000000000..d505c07ac162 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/modti3.c @@ -0,0 +1,34 @@ +/* ===-- modti3.c - Implement __modti3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __modti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/*Returns: a % b */ + +COMPILER_RT_ABI ti_int +__modti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + tu_int r; + __udivmodti4(a, b, &r); + return ((ti_int)r ^ s) - s; /* negate if s == -1 */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/muldc3.c b/contrib/compiler-rt/lib/builtins/muldc3.c new file mode 100644 index 000000000000..16d8e98390a3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/muldc3.c @@ -0,0 +1,73 @@ +/* ===-- muldc3.c - Implement __muldc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muldc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Dcomplex +__muldc3(double __a, double __b, double __c, double __d) +{ + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + Dcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/muldf3.c b/contrib/compiler-rt/lib/builtins/muldf3.c new file mode 100644 index 000000000000..1bb103e38c13 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/muldf3.c @@ -0,0 +1,30 @@ +//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { + return __muldf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__muldf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/muldi3.c b/contrib/compiler-rt/lib/builtins/muldi3.c new file mode 100644 index 000000000000..a187315e9165 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/muldi3.c @@ -0,0 +1,58 @@ +/* ===-- muldi3.c - Implement __muldi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muldi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +static +di_int +__muldsi3(su_int a, su_int b) +{ + dwords r; + const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; + const su_int lower_mask = (su_int)~0 >> bits_in_word_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + su_int t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (a >> bits_in_word_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high = t >> bits_in_word_2; + t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (b >> bits_in_word_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high += t >> bits_in_word_2; + r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); + return r.all; +} + +/* Returns: a * b */ + +COMPILER_RT_ABI di_int +__muldi3(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + dwords r; + r.all = __muldsi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) COMPILER_RT_ALIAS(__muldi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/mulodi4.c b/contrib/compiler-rt/lib/builtins/mulodi4.c new file mode 100644 index 000000000000..d2fd7db2bcd3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulodi4.c @@ -0,0 +1,58 @@ +/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulodi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + +COMPILER_RT_ABI di_int +__mulodi4(di_int a, di_int b, int* overflow) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N-1); + const di_int MAX = ~MIN; + *overflow = 0; + di_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} diff --git a/contrib/compiler-rt/lib/builtins/mulosi4.c b/contrib/compiler-rt/lib/builtins/mulosi4.c new file mode 100644 index 000000000000..422528085c44 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulosi4.c @@ -0,0 +1,58 @@ +/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulosi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + +COMPILER_RT_ABI si_int +__mulosi4(si_int a, si_int b, int* overflow) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N-1); + const si_int MAX = ~MIN; + *overflow = 0; + si_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} diff --git a/contrib/compiler-rt/lib/builtins/muloti4.c b/contrib/compiler-rt/lib/builtins/muloti4.c new file mode 100644 index 000000000000..16b218920321 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/muloti4.c @@ -0,0 +1,62 @@ +/*===-- muloti4.c - Implement __muloti4 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __muloti4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +/* Effects: sets *overflow to 1 if a * b overflows */ + +COMPILER_RT_ABI ti_int +__muloti4(ti_int a, ti_int b, int* overflow) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N-1); + const ti_int MAX = ~MIN; + *overflow = 0; + ti_int result = a * b; + if (a == MIN) + { + if (b != 0 && b != 1) + *overflow = 1; + return result; + } + if (b == MIN) + { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + *overflow = 1; + } + else + { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/mulsc3.c b/contrib/compiler-rt/lib/builtins/mulsc3.c new file mode 100644 index 000000000000..c89cfd247a15 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulsc3.c @@ -0,0 +1,73 @@ +/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulsc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Fcomplex +__mulsc3(float __a, float __b, float __c, float __d) +{ + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + Fcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/mulsf3.c b/contrib/compiler-rt/lib/builtins/mulsf3.c new file mode 100644 index 000000000000..1e2cf3e717c9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulsf3.c @@ -0,0 +1,30 @@ +//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { + return __mulsf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__mulsf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/multc3.c b/contrib/compiler-rt/lib/builtins/multc3.c new file mode 100644 index 000000000000..0518bc2569f1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/multc3.c @@ -0,0 +1,68 @@ +/* ===-- multc3.c - Implement __multc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __multc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI long double _Complex +__multc3(long double a, long double b, long double c, long double d) +{ + long double ac = a * c; + long double bd = b * d; + long double ad = a * d; + long double bc = b * c; + long double _Complex z; + __real__ z = ac - bd; + __imag__ z = ad + bc; + if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { + int recalc = 0; + if (crt_isinf(a) || crt_isinf(b)) { + a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); + b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; + } + if (crt_isinf(c) || crt_isinf(d)) { + c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); + d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + recalc = 1; + } + if (!recalc && (crt_isinf(ac) || crt_isinf(bd) || + crt_isinf(ad) || crt_isinf(bc))) { + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; + } + if (recalc) { + __real__ z = CRT_INFINITY * (a * c - b * d); + __imag__ z = CRT_INFINITY * (a * d + b * c); + } + } + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/multf3.c b/contrib/compiler-rt/lib/builtins/multf3.c new file mode 100644 index 000000000000..0b915923ea09 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/multf3.c @@ -0,0 +1,25 @@ +//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { + return __mulXf3__(a, b); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/multi3.c b/contrib/compiler-rt/lib/builtins/multi3.c new file mode 100644 index 000000000000..e0d52d430b66 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/multi3.c @@ -0,0 +1,58 @@ +/* ===-- multi3.c - Implement __multi3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + + * This file implements __multi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +static +ti_int +__mulddi3(du_int a, du_int b) +{ + twords r; + const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; + const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + du_int t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (a >> bits_in_dword_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high = t >> bits_in_dword_2; + t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (b >> bits_in_dword_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high += t >> bits_in_dword_2; + r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); + return r.all; +} + +/* Returns: a * b */ + +COMPILER_RT_ABI ti_int +__multi3(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + twords r; + r.all = __mulddi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/mulvdi3.c b/contrib/compiler-rt/lib/builtins/mulvdi3.c new file mode 100644 index 000000000000..e63249e0a04c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulvdi3.c @@ -0,0 +1,56 @@ +/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI di_int +__mulvdi3(di_int a, di_int b) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N-1); + const di_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} diff --git a/contrib/compiler-rt/lib/builtins/mulvsi3.c b/contrib/compiler-rt/lib/builtins/mulvsi3.c new file mode 100644 index 000000000000..74ea4f2da226 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulvsi3.c @@ -0,0 +1,56 @@ +/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI si_int +__mulvsi3(si_int a, si_int b) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N-1); + const si_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} diff --git a/contrib/compiler-rt/lib/builtins/mulvti3.c b/contrib/compiler-rt/lib/builtins/mulvti3.c new file mode 100644 index 000000000000..f4c7d1612ba9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulvti3.c @@ -0,0 +1,60 @@ +/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a * b */ + +/* Effects: aborts if a * b overflows */ + +COMPILER_RT_ABI ti_int +__mulvti3(ti_int a, ti_int b) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N-1); + const ti_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } + else + { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/mulxc3.c b/contrib/compiler-rt/lib/builtins/mulxc3.c new file mode 100644 index 000000000000..ba3221691821 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/mulxc3.c @@ -0,0 +1,77 @@ +/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __mulxc3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" +#include "int_math.h" + +/* Returns: the product of a + ib and c + id */ + +COMPILER_RT_ABI Lcomplex +__mulxc3(long double __a, long double __b, long double __c, long double __d) +{ + long double __ac = __a * __c; + long double __bd = __b * __d; + long double __ad = __a * __d; + long double __bc = __b * __c; + Lcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) + { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) + { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; + } + if (crt_isinf(__c) || crt_isinf(__d)) + { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || + crt_isinf(__ad) || crt_isinf(__bc))) + { + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; + } + if (__recalc) + { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/negdf2.c b/contrib/compiler-rt/lib/builtins/negdf2.c new file mode 100644 index 000000000000..f0bfaad24743 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negdf2.c @@ -0,0 +1,30 @@ +//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t +__negdf2(fp_t a) { + return fromRep(toRep(a) ^ signBit); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_dneg(fp_t a) { + return __negdf2(a); +} +#else +AEABI_RTABI fp_t __aeabi_dneg(fp_t a) COMPILER_RT_ALIAS(__negdf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/negdi2.c b/contrib/compiler-rt/lib/builtins/negdi2.c new file mode 100644 index 000000000000..3d49ba2899d2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negdi2.c @@ -0,0 +1,26 @@ +/* ===-- negdi2.c - Implement __negdi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +COMPILER_RT_ABI di_int +__negdi2(di_int a) +{ + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; +} diff --git a/contrib/compiler-rt/lib/builtins/negsf2.c b/contrib/compiler-rt/lib/builtins/negsf2.c new file mode 100644 index 000000000000..05c97d4d5a11 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negsf2.c @@ -0,0 +1,30 @@ +//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t +__negsf2(fp_t a) { + return fromRep(toRep(a) ^ signBit); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fneg(fp_t a) { + return __negsf2(a); +} +#else +AEABI_RTABI fp_t __aeabi_fneg(fp_t a) COMPILER_RT_ALIAS(__negsf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/negti2.c b/contrib/compiler-rt/lib/builtins/negti2.c new file mode 100644 index 000000000000..9b00b303f856 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negti2.c @@ -0,0 +1,30 @@ +/* ===-- negti2.c - Implement __negti2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: -a */ + +COMPILER_RT_ABI ti_int +__negti2(ti_int a) +{ + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/negvdi2.c b/contrib/compiler-rt/lib/builtins/negvdi2.c new file mode 100644 index 000000000000..e336ecf28f0d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negvdi2.c @@ -0,0 +1,28 @@ +/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negvdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI di_int +__negvdi2(di_int a) +{ + const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} diff --git a/contrib/compiler-rt/lib/builtins/negvsi2.c b/contrib/compiler-rt/lib/builtins/negvsi2.c new file mode 100644 index 000000000000..b9e93fef06c5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negvsi2.c @@ -0,0 +1,28 @@ +/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __negvsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI si_int +__negvsi2(si_int a) +{ + const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} diff --git a/contrib/compiler-rt/lib/builtins/negvti2.c b/contrib/compiler-rt/lib/builtins/negvti2.c new file mode 100644 index 000000000000..85f9f7d19d9d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/negvti2.c @@ -0,0 +1,32 @@ +/*===-- negvti2.c - Implement __negvti2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------=== + * + *This file implements __negvti2 for the compiler_rt library. + * + *===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: -a */ + +/* Effects: aborts if -a overflows */ + +COMPILER_RT_ABI ti_int +__negvti2(ti_int a) +{ + const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); + if (a == MIN) + compilerrt_abort(); + return -a; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/os_version_check.c b/contrib/compiler-rt/lib/builtins/os_version_check.c new file mode 100644 index 000000000000..e0d40edc7e38 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/os_version_check.c @@ -0,0 +1,226 @@ +/* ===-- os_version_check.c - OS version checking -------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements the function __isOSVersionAtLeast, used by + * Objective-C's @available + * + * ===----------------------------------------------------------------------=== + */ + +#ifdef __APPLE__ + +#include <TargetConditionals.h> +#include <dispatch/dispatch.h> +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* These three variables hold the host's OS version. */ +static int32_t GlobalMajor, GlobalMinor, GlobalSubminor; +static dispatch_once_t DispatchOnceCounter; + +/* We can't include <CoreFoundation/CoreFoundation.h> directly from here, so + * just forward declare everything that we need from it. */ + +typedef const void *CFDataRef, *CFAllocatorRef, *CFPropertyListRef, + *CFStringRef, *CFDictionaryRef, *CFTypeRef, *CFErrorRef; + +#if __LLP64__ +typedef unsigned long long CFTypeID; +typedef unsigned long long CFOptionFlags; +typedef signed long long CFIndex; +#else +typedef unsigned long CFTypeID; +typedef unsigned long CFOptionFlags; +typedef signed long CFIndex; +#endif + +typedef unsigned char UInt8; +typedef _Bool Boolean; +typedef CFIndex CFPropertyListFormat; +typedef uint32_t CFStringEncoding; + +/* kCFStringEncodingASCII analog. */ +#define CF_STRING_ENCODING_ASCII 0x0600 +/* kCFStringEncodingUTF8 analog. */ +#define CF_STRING_ENCODING_UTF8 0x08000100 +#define CF_PROPERTY_LIST_IMMUTABLE 0 + +typedef CFDataRef (*CFDataCreateWithBytesNoCopyFuncTy)(CFAllocatorRef, + const UInt8 *, CFIndex, + CFAllocatorRef); +typedef CFPropertyListRef (*CFPropertyListCreateWithDataFuncTy)( + CFAllocatorRef, CFDataRef, CFOptionFlags, CFPropertyListFormat *, + CFErrorRef *); +typedef CFPropertyListRef (*CFPropertyListCreateFromXMLDataFuncTy)( + CFAllocatorRef, CFDataRef, CFOptionFlags, CFStringRef *); +typedef CFStringRef (*CFStringCreateWithCStringNoCopyFuncTy)(CFAllocatorRef, + const char *, + CFStringEncoding, + CFAllocatorRef); +typedef const void *(*CFDictionaryGetValueFuncTy)(CFDictionaryRef, + const void *); +typedef CFTypeID (*CFGetTypeIDFuncTy)(CFTypeRef); +typedef CFTypeID (*CFStringGetTypeIDFuncTy)(void); +typedef Boolean (*CFStringGetCStringFuncTy)(CFStringRef, char *, CFIndex, + CFStringEncoding); +typedef void (*CFReleaseFuncTy)(CFTypeRef); + +/* Find and parse the SystemVersion.plist file. */ +static void parseSystemVersionPList(void *Unused) { + (void)Unused; + /* Load CoreFoundation dynamically */ + const void *NullAllocator = dlsym(RTLD_DEFAULT, "kCFAllocatorNull"); + if (!NullAllocator) + return; + const CFAllocatorRef AllocatorNull = *(const CFAllocatorRef *)NullAllocator; + CFDataCreateWithBytesNoCopyFuncTy CFDataCreateWithBytesNoCopyFunc = + (CFDataCreateWithBytesNoCopyFuncTy)dlsym(RTLD_DEFAULT, + "CFDataCreateWithBytesNoCopy"); + if (!CFDataCreateWithBytesNoCopyFunc) + return; + CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc = + (CFPropertyListCreateWithDataFuncTy)dlsym( + RTLD_DEFAULT, "CFPropertyListCreateWithData"); +/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it + * will be NULL on earlier OS versions. */ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + CFPropertyListCreateFromXMLDataFuncTy CFPropertyListCreateFromXMLDataFunc = + (CFPropertyListCreateFromXMLDataFuncTy)dlsym( + RTLD_DEFAULT, "CFPropertyListCreateFromXMLData"); +#pragma clang diagnostic pop + /* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it + * might be NULL in future OS versions. */ + if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc) + return; + CFStringCreateWithCStringNoCopyFuncTy CFStringCreateWithCStringNoCopyFunc = + (CFStringCreateWithCStringNoCopyFuncTy)dlsym( + RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy"); + if (!CFStringCreateWithCStringNoCopyFunc) + return; + CFDictionaryGetValueFuncTy CFDictionaryGetValueFunc = + (CFDictionaryGetValueFuncTy)dlsym(RTLD_DEFAULT, "CFDictionaryGetValue"); + if (!CFDictionaryGetValueFunc) + return; + CFGetTypeIDFuncTy CFGetTypeIDFunc = + (CFGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFGetTypeID"); + if (!CFGetTypeIDFunc) + return; + CFStringGetTypeIDFuncTy CFStringGetTypeIDFunc = + (CFStringGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetTypeID"); + if (!CFStringGetTypeIDFunc) + return; + CFStringGetCStringFuncTy CFStringGetCStringFunc = + (CFStringGetCStringFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetCString"); + if (!CFStringGetCStringFunc) + return; + CFReleaseFuncTy CFReleaseFunc = + (CFReleaseFuncTy)dlsym(RTLD_DEFAULT, "CFRelease"); + if (!CFReleaseFunc) + return; + + char *PListPath = "/System/Library/CoreServices/SystemVersion.plist"; + +#if TARGET_OS_SIMULATOR + char *PListPathPrefix = getenv("IPHONE_SIMULATOR_ROOT"); + if (!PListPathPrefix) + return; + char FullPath[strlen(PListPathPrefix) + strlen(PListPath) + 1]; + strcpy(FullPath, PListPathPrefix); + strcat(FullPath, PListPath); + PListPath = FullPath; +#endif + FILE *PropertyList = fopen(PListPath, "r"); + if (!PropertyList) + return; + + /* Dynamically allocated stuff. */ + CFDictionaryRef PListRef = NULL; + CFDataRef FileContentsRef = NULL; + UInt8 *PListBuf = NULL; + + fseek(PropertyList, 0, SEEK_END); + long PListFileSize = ftell(PropertyList); + if (PListFileSize < 0) + goto Fail; + rewind(PropertyList); + + PListBuf = malloc((size_t)PListFileSize); + if (!PListBuf) + goto Fail; + + size_t NumRead = fread(PListBuf, 1, (size_t)PListFileSize, PropertyList); + if (NumRead != (size_t)PListFileSize) + goto Fail; + + /* Get the file buffer into CF's format. We pass in a null allocator here * + * because we free PListBuf ourselves */ + FileContentsRef = (*CFDataCreateWithBytesNoCopyFunc)( + NULL, PListBuf, (CFIndex)NumRead, AllocatorNull); + if (!FileContentsRef) + goto Fail; + + if (CFPropertyListCreateWithDataFunc) + PListRef = (*CFPropertyListCreateWithDataFunc)( + NULL, FileContentsRef, CF_PROPERTY_LIST_IMMUTABLE, NULL, NULL); + else + PListRef = (*CFPropertyListCreateFromXMLDataFunc)( + NULL, FileContentsRef, CF_PROPERTY_LIST_IMMUTABLE, NULL); + if (!PListRef) + goto Fail; + + CFStringRef ProductVersion = (*CFStringCreateWithCStringNoCopyFunc)( + NULL, "ProductVersion", CF_STRING_ENCODING_ASCII, AllocatorNull); + if (!ProductVersion) + goto Fail; + CFTypeRef OpaqueValue = (*CFDictionaryGetValueFunc)(PListRef, ProductVersion); + (*CFReleaseFunc)(ProductVersion); + if (!OpaqueValue || + (*CFGetTypeIDFunc)(OpaqueValue) != (*CFStringGetTypeIDFunc)()) + goto Fail; + + char VersionStr[32]; + if (!(*CFStringGetCStringFunc)((CFStringRef)OpaqueValue, VersionStr, + sizeof(VersionStr), CF_STRING_ENCODING_UTF8)) + goto Fail; + sscanf(VersionStr, "%d.%d.%d", &GlobalMajor, &GlobalMinor, &GlobalSubminor); + +Fail: + if (PListRef) + (*CFReleaseFunc)(PListRef); + if (FileContentsRef) + (*CFReleaseFunc)(FileContentsRef); + free(PListBuf); + fclose(PropertyList); +} + +int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) { + /* Populate the global version variables, if they haven't already. */ + dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList); + + if (Major < GlobalMajor) + return 1; + if (Major > GlobalMajor) + return 0; + if (Minor < GlobalMinor) + return 1; + if (Minor > GlobalMinor) + return 0; + return Subminor <= GlobalSubminor; +} + +#else + +/* Silence an empty translation unit warning. */ +typedef int unused; + +#endif diff --git a/contrib/compiler-rt/lib/builtins/paritydi2.c b/contrib/compiler-rt/lib/builtins/paritydi2.c new file mode 100644 index 000000000000..8ea5ab4214e9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/paritydi2.c @@ -0,0 +1,25 @@ +/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __paritydi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__paritydi2(di_int a) +{ + dwords x; + x.all = a; + return __paritysi2(x.s.high ^ x.s.low); +} diff --git a/contrib/compiler-rt/lib/builtins/paritysi2.c b/contrib/compiler-rt/lib/builtins/paritysi2.c new file mode 100644 index 000000000000..599984663849 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/paritysi2.c @@ -0,0 +1,27 @@ +/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __paritysi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__paritysi2(si_int a) +{ + su_int x = (su_int)a; + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + return (0x6996 >> (x & 0xF)) & 1; +} diff --git a/contrib/compiler-rt/lib/builtins/parityti2.c b/contrib/compiler-rt/lib/builtins/parityti2.c new file mode 100644 index 000000000000..5a4fe492486f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/parityti2.c @@ -0,0 +1,29 @@ +/* ===-- parityti2.c - Implement __parityti2 -------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __parityti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: 1 if number of bits is odd else returns 0 */ + +COMPILER_RT_ABI si_int +__parityti2(ti_int a) +{ + twords x; + x.all = a; + return __paritydi2(x.s.high ^ x.s.low); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/popcountdi2.c b/contrib/compiler-rt/lib/builtins/popcountdi2.c new file mode 100644 index 000000000000..5e8a62f075eb --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/popcountdi2.c @@ -0,0 +1,36 @@ +/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountdi2(di_int a) +{ + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + /* Every 2 bits holds the sum of every pair of bits (32) */ + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ + /* Upper 32 bits are garbage */ + x = x + (x >> 16); + /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ +} diff --git a/contrib/compiler-rt/lib/builtins/popcountsi2.c b/contrib/compiler-rt/lib/builtins/popcountsi2.c new file mode 100644 index 000000000000..44544ff49890 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/popcountsi2.c @@ -0,0 +1,33 @@ +/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountsi2(si_int a) +{ + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + /* Every 2 bits holds the sum of every pair of bits */ + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ + x = (x + (x >> 4)) & 0x0F0F0F0F; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ + x = (x + (x >> 16)); + /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ +} diff --git a/contrib/compiler-rt/lib/builtins/popcountti2.c b/contrib/compiler-rt/lib/builtins/popcountti2.c new file mode 100644 index 000000000000..7451bbb286b7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/popcountti2.c @@ -0,0 +1,44 @@ +/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __popcountti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: count of 1 bits */ + +COMPILER_RT_ABI si_int +__popcountti2(ti_int a) +{ + tu_int x3 = (tu_int)a; + x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | + 0x5555555555555555uLL)); + /* Every 2 bits holds the sum of every pair of bits (64) */ + x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) + + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */ + x3 = (x3 + (x3 >> 4)) + & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */ + du_int x2 = (du_int)(x3 + (x3 >> 64)); + /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */ + x = x + (x >> 16); + /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */ +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/powidf2.c b/contrib/compiler-rt/lib/builtins/powidf2.c new file mode 100644 index 000000000000..ac13b172b043 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/powidf2.c @@ -0,0 +1,34 @@ +/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powidf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI double +__powidf2(double a, si_int b) +{ + const int recip = b < 0; + double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/contrib/compiler-rt/lib/builtins/powisf2.c b/contrib/compiler-rt/lib/builtins/powisf2.c new file mode 100644 index 000000000000..0c400ec6dd6a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/powisf2.c @@ -0,0 +1,34 @@ +/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powisf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI float +__powisf2(float a, si_int b) +{ + const int recip = b < 0; + float r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/contrib/compiler-rt/lib/builtins/powitf2.c b/contrib/compiler-rt/lib/builtins/powitf2.c new file mode 100644 index 000000000000..172f29f58f25 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/powitf2.c @@ -0,0 +1,38 @@ +/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powitf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#if _ARCH_PPC + +/* Returns: a ^ b */ + +COMPILER_RT_ABI long double +__powitf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/powixf2.c b/contrib/compiler-rt/lib/builtins/powixf2.c new file mode 100644 index 000000000000..0fd96e503e7b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/powixf2.c @@ -0,0 +1,38 @@ +/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __powixf2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#if !_ARCH_PPC + +#include "int_lib.h" + +/* Returns: a ^ b */ + +COMPILER_RT_ABI long double +__powixf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/ppc/DD.h b/contrib/compiler-rt/lib/builtins/ppc/DD.h new file mode 100644 index 000000000000..3e5f9e58c138 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/DD.h @@ -0,0 +1,45 @@ +#ifndef COMPILERRT_DD_HEADER +#define COMPILERRT_DD_HEADER + +#include "../int_lib.h" + +typedef union { + long double ld; + struct { + double hi; + double lo; + }s; +} DD; + +typedef union { + double d; + uint64_t x; +} doublebits; + +#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ + (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) + +static __inline ALWAYS_INLINE double local_fabs(double x) { + doublebits result = {.d = x}; + result.x &= UINT64_C(0x7fffffffffffffff); + return result.d; +} + +static __inline ALWAYS_INLINE double high26bits(double x) { + doublebits result = {.d = x}; + result.x &= UINT64_C(0xfffffffff8000000); + return result.d; +} + +static __inline ALWAYS_INLINE int different_sign(double x, double y) { + doublebits xsignbit = {.d = x}, ysignbit = {.d = y}; + int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63); + return result; +} + +long double __gcc_qadd(long double, long double); +long double __gcc_qsub(long double, long double); +long double __gcc_qmul(long double, long double); +long double __gcc_qdiv(long double, long double); + +#endif /* COMPILERRT_DD_HEADER */ diff --git a/contrib/compiler-rt/lib/builtins/ppc/divtc3.c b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c new file mode 100644 index 000000000000..ef532b841145 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c @@ -0,0 +1,97 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#include "DD.h" +#include "../int_math.h" +// Use DOUBLE_PRECISION because the soft-fp method we use is logb (on the upper +// half of the long doubles), even though this file defines complex division for +// 128-bit floats. +#define DOUBLE_PRECISION +#include "../fp_lib.h" + +#if !defined(CRT_INFINITY) && defined(HUGE_VAL) +#define CRT_INFINITY HUGE_VAL +#endif /* CRT_INFINITY */ + +#define makeFinite(x) { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } + +long double _Complex +__divtc3(long double a, long double b, long double c, long double d) +{ + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + int ilogbw = 0; + const double logbw = __compiler_rt_logb( + crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi))); + + if (crt_isfinite(logbw)) + { + ilogbw = (int)logbw; + + cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); + cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); + dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); + dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); + } + + const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); + const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); + const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); + + DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; + DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; + + real.s.hi = crt_scalbn(real.s.hi, -ilogbw); + real.s.lo = crt_scalbn(real.s.lo, -ilogbw); + imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); + imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) + { + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD rDD = { .ld = denom }; + + if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || + !crt_isnan(bDD.s.hi))) + { + real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi; + real.s.lo = 0.0; + imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi; + imag.s.lo = 0.0; + } + + else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && + crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi); + imag.s.lo = 0.0; + } + + else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && + crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi)); + real.s.lo = 0.0; + imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi)); + imag.s.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c b/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c new file mode 100644 index 000000000000..2c7c0f8e279f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c @@ -0,0 +1,104 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* int64_t __fixunstfdi(long double x); + * This file implements the PowerPC 128-bit double-double -> int64_t conversion + */ + +#include "DD.h" +#include "../int_math.h" + +uint64_t __fixtfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.s.hi }; + + const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); + const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p63: */ + if (UINT32_C(0x03f00000) > absHighWordMinusOne) + { + /* Do an unsigned conversion of the absolute value, then restore the sign. */ + const int unbiasedHeadExponent = absHighWordMinusOne >> 20; + + int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 10; /* mantissa(hi) with one zero preceding bit. */ + + const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) + { + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + /* We need to negate it if head and tail have different signs. */ + const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; + const int64_t negationMask = loNegationMask ^ hiNegationMask; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 10 that we shifted the head mantissa by. + */ + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); + + result += tailMantissa; + } + + result >>= (62 - unbiasedHeadExponent); + + /* Restore the sign of the result and return */ + result = (result ^ hiNegationMask) - hiNegationMask; + return result; + + } + + /* Edge cases handled here: */ + + /* |x| < 1, result is zero. */ + if (1.0 > crt_fabs(x.s.hi)) + return INT64_C(0); + + /* x very close to INT64_MIN, care must be taken to see which side we are on. */ + if (x.s.hi == -0x1.0p63) { + + int64_t result = INT64_MIN; + + if (0.0 < x.s.lo) + { + /* If the tail is positive, the correct result is something other than INT64_MIN. + * we'll need to figure out what it is. + */ + + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* Now we negate the tailMantissa */ + tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); + + /* And shift it by the appropriate amount */ + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + tailMantissa >>= 1075 - biasedTailExponent; + + result -= tailMantissa; + } + + return result; + } + + /* Signed overflows, infinities, and NaNs */ + if (x.s.hi > 0.0) + return INT64_MAX; + else + return INT64_MIN; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c b/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c new file mode 100644 index 000000000000..5e6e2cedf6ac --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c @@ -0,0 +1,59 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* uint64_t __fixunstfdi(long double x); */ +/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */ + +#include "DD.h" + +uint64_t __fixunstfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.s.hi }; + + const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p64: */ + if (UINT32_C(0x04000000) > highWordMinusOne) + { + const int unbiasedHeadExponent = highWordMinusOne >> 20; + + uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) + { + const doublebits lobits = { .d = x.s.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + + const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 11 that we shifted the head mantissa by. + */ + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); + + result += tailMantissa; + } + + result >>= (63 - unbiasedHeadExponent); + return result; + } + + /* Edge cases are handled here, with saturation. */ + if (1.0 > x.s.hi) + return UINT64_C(0); + else + return UINT64_MAX; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c b/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c new file mode 100644 index 000000000000..fa21084cb576 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c @@ -0,0 +1,106 @@ +//===-- lib/builtins/ppc/fixunstfti.c - Convert long double->int128 *-C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting the 128bit IBM/PowerPC long double (double- +// double) data type to an unsigned 128 bit integer. +// +//===----------------------------------------------------------------------===// + +#include "../int_math.h" +#define BIAS 1023 + +/* Convert long double into an unsigned 128-bit integer. */ +__uint128_t __fixunstfti(long double input) { + + /* If we are trying to convert a NaN, return the NaN bit pattern. */ + if (crt_isnan(input)) { + return ((__uint128_t)0x7FF8000000000000ll) << 64 | + (__uint128_t)0x0000000000000000ll; + } + + __uint128_t result, hiResult, loResult; + int hiExponent, loExponent, shift; + /* The long double representation, with the high and low portions of + * the long double, and the corresponding bit patterns of each double. */ + union { + long double ld; + double d[2]; /* [0] is the high double, [1] is the low double. */ + unsigned long long ull[2]; /* High and low doubles as 64-bit integers. */ + } ldUnion; + + /* If the long double is less than 1.0 or negative, + * return 0.0. */ + if (input < 1.0) + return 0.0; + + /* Retrieve the 64-bit patterns of high and low doubles. + * Compute the unbiased exponent of both high and low doubles by + * removing the signs, isolating the exponent, and subtracting + * the bias from it. */ + ldUnion.ld = input; + hiExponent = ((ldUnion.ull[0] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS; + loExponent = ((ldUnion.ull[1] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS; + + /* Convert each double into int64; they will be added to the int128 result. + * CASE 1: High or low double fits in int64 + * - Convert the each double normally into int64. + * + * CASE 2: High or low double does not fit in int64 + * - Scale the double to fit within a 64-bit integer + * - Calculate the shift (amount to scale the double by in the int128) + * - Clear all the bits of the exponent (with 0x800FFFFFFFFFFFFF) + * - Add BIAS+53 (0x4350000000000000) to exponent to correct the value + * - Scale (move) the double to the correct place in the int128 + * (Move it by 2^53 places) + * + * Note: If the high double is assumed to be positive, an unsigned conversion + * from long double to 64-bit integer is needed. The low double can be either + * positive or negative, so a signed conversion is needed to retain the result + * of the low double and to ensure it does not simply get converted to 0. */ + + /* CASE 1 - High double fits in int64. */ + if (hiExponent < 63) { + hiResult = (unsigned long long)ldUnion.d[0]; + } else if (hiExponent < 128) { + /* CASE 2 - High double does not fit in int64, scale and convert it. */ + shift = hiExponent - 54; + ldUnion.ull[0] &= 0x800FFFFFFFFFFFFFll; + ldUnion.ull[0] |= 0x4350000000000000ll; + hiResult = (unsigned long long)ldUnion.d[0]; + hiResult <<= shift; + } else { + /* Detect cases for overflow. When the exponent of the high + * double is greater than 128 bits and when the long double + * input is positive, return the max 128-bit integer. + * For negative inputs with exponents > 128, return 1, like gcc. */ + if (ldUnion.d[0] > 0) { + return ((__uint128_t)0xFFFFFFFFFFFFFFFFll) << 64 | + (__uint128_t)0xFFFFFFFFFFFFFFFFll; + } else { + return ((__uint128_t)0x0000000000000000ll) << 64 | + (__uint128_t)0x0000000000000001ll; + } + } + + /* CASE 1 - Low double fits in int64. */ + if (loExponent < 63) { + loResult = (long long)ldUnion.d[1]; + } else { + /* CASE 2 - Low double does not fit in int64, scale and convert it. */ + shift = loExponent - 54; + ldUnion.ull[1] &= 0x800FFFFFFFFFFFFFll; + ldUnion.ull[1] |= 0x4350000000000000ll; + loResult = (long long)ldUnion.d[1]; + loResult <<= shift; + } + + /* Add the high and low doublewords together to form a 128 bit integer. */ + result = loResult + hiResult; + return result; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/floatditf.c b/contrib/compiler-rt/lib/builtins/ppc/floatditf.c new file mode 100644 index 000000000000..beabdd017422 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/floatditf.c @@ -0,0 +1,36 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatditf(long long x); */ +/* This file implements the PowerPC long long -> long double conversion */ + +#include "DD.h" + +long double __floatditf(int64_t a) { + + static const double twop32 = 0x1.0p32; + static const double twop52 = 0x1.0p52; + + doublebits low = { .d = twop52 }; + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */ + + const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. + * This could be avoided, but at some substantial cost. + */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; + +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/floattitf.c b/contrib/compiler-rt/lib/builtins/ppc/floattitf.c new file mode 100644 index 000000000000..b8e297b6b8d9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/floattitf.c @@ -0,0 +1,48 @@ +//===-- lib/builtins/ppc/floattitf.c - Convert int128->long double -*-C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting a signed 128 bit integer to a 128bit IBM / +// PowerPC long double (double-double) value. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> + +/* Conversions from signed and unsigned 64-bit int to long double. */ +long double __floatditf(int64_t); +long double __floatunditf(uint64_t); + +/* Convert a signed 128-bit integer to long double. + * This uses the following property: Let hi and lo be 64-bits each, + * and let signed_val_k() and unsigned_val_k() be the value of the + * argument interpreted as a signed or unsigned k-bit integer. Then, + * + * signed_val_128(hi,lo) = signed_val_64(hi) * 2^64 + unsigned_val_64(lo) + * = (long double)hi * 2^64 + (long double)lo, + * + * where (long double)hi and (long double)lo are signed and + * unsigned 64-bit integer to long double conversions, respectively. + */ +long double __floattitf(__int128_t arg) { + /* Split the int128 argument into 64-bit high and low int64 parts. */ + int64_t ArgHiPart = (int64_t)(arg >> 64); + uint64_t ArgLoPart = (uint64_t)arg; + + /* Convert each 64-bit part into long double. The high part + * must be a signed conversion and the low part an unsigned conversion + * to ensure the correct result. */ + long double ConvertedHiPart = __floatditf(ArgHiPart); + long double ConvertedLoPart = __floatunditf(ArgLoPart); + + /* The low bit of ArgHiPart corresponds to the 2^64 bit in arg. + * Multiply the high part by 2^64 to undo the right shift by 64-bits + * done in the splitting. Then, add to the low part to obtain the + * final result. */ + return ((ConvertedHiPart * 0x1.0p64) + ConvertedLoPart); +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c b/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c new file mode 100644 index 000000000000..b12e1e738fd0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c @@ -0,0 +1,41 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatunditf(unsigned long long x); */ +/* This file implements the PowerPC unsigned long long -> long double conversion */ + +#include "DD.h" + +long double __floatunditf(uint64_t a) { + + /* Begins with an exact copy of the code from __floatundidf */ + + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + doublebits high = { .d = twop84 }; + doublebits low = { .d = twop52 }; + + high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ + + const double high_addend = high.d - twop84_plus_twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. */ + /* This could be avoided, but at some substantial cost. */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; + +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c new file mode 100644 index 000000000000..32e16e9d1d11 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c @@ -0,0 +1,76 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qadd(long double x, long double y); + * This file implements the PowerPC 128-bit double-double add operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qadd(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = src.s.hi, b = src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c new file mode 100644 index 000000000000..70aa00b64400 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c @@ -0,0 +1,55 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qdiv(long double x, long double y); + * This file implements the PowerPC 128-bit double-double division operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qdiv(long double a, long double b) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = a }, src = { .ld = b }; + + register double x = dst.s.hi, x1 = dst.s.lo, + y = src.s.hi, y1 = src.s.lo; + + double yHi, yLo, qHi, qLo; + double yq, tmp, q; + + q = x / y; + + /* Detect special cases */ + if (q == 0.0) { + dst.s.hi = q; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits qBits = { .d = q }; + if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = q; + dst.s.lo = 0.0; + return dst.ld; + } + + yHi = high26bits(y); + qHi = high26bits(q); + + yq = y * q; + yLo = y - yHi; + qLo = q - qHi; + + tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); + tmp = (x - yq) - tmp; + tmp = ((tmp + x1) - y1 * q) / y; + x = q + tmp; + + dst.s.lo = (q - x) + tmp; + dst.s.hi = x; + + return dst.ld; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c new file mode 100644 index 000000000000..fb4c5164ccb5 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c @@ -0,0 +1,53 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qmul(long double x, long double y); + * This file implements the PowerPC 128-bit double-double multiply operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qmul(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = src.s.hi, b = src.s.lo; + + double aHi, aLo, bHi, bLo; + double ab, tmp, tau; + + ab = A * B; + + /* Detect special cases */ + if (ab == 0.0) { + dst.s.hi = ab; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits abBits = { .d = ab }; + if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = ab; + dst.s.lo = 0.0; + return dst.ld; + } + + /* Generic cases handled here. */ + aHi = high26bits(A); + bHi = high26bits(B); + aLo = A - aHi; + bLo = B - bHi; + + tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); + tmp += (A * b + a * B); + tau = ab + tmp; + + dst.s.lo = (ab - tau) + tmp; + dst.s.hi = tau; + + return dst.ld; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c new file mode 100644 index 000000000000..c092e24dbda1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c @@ -0,0 +1,76 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __gcc_qsub(long double x, long double y); + * This file implements the PowerPC 128-bit double-double add operation. + * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + */ + +#include "DD.h" + +long double __gcc_qsub(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.s.hi, a = dst.s.lo, + B = -src.s.hi, b = -src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/multc3.c b/contrib/compiler-rt/lib/builtins/ppc/multc3.c new file mode 100644 index 000000000000..9dd79c975dde --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/multc3.c @@ -0,0 +1,90 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#include "DD.h" +#include "../int_math.h" + +#define makeFinite(x) { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } + +#define zeroNaN(x) { \ + if (crt_isnan((x).s.hi)) { \ + (x).s.hi = crt_copysign(0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } \ + } + +long double _Complex +__multc3(long double a, long double b, long double c, long double d) +{ + long double ac = __gcc_qmul(a,c); + long double bd = __gcc_qmul(b,d); + long double ad = __gcc_qmul(a,d); + long double bc = __gcc_qmul(b,c); + + DD real = { .ld = __gcc_qsub(ac,bd) }; + DD imag = { .ld = __gcc_qadd(ad,bc) }; + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) + { + int recalc = 0; + + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + + if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + zeroNaN(aDD); + zeroNaN(bDD); + recalc = 1; + } + + if (!recalc) + { + DD acDD = { .ld = ac }; + DD bdDD = { .ld = bd }; + DD adDD = { .ld = ad }; + DD bcDD = { .ld = bc }; + + if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || + crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) + { + zeroNaN(aDD); + zeroNaN(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + } + + if (recalc) + { + real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi); + imag.s.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/contrib/compiler-rt/lib/builtins/ppc/restFP.S b/contrib/compiler-rt/lib/builtins/ppc/restFP.S new file mode 100644 index 000000000000..507e756e18b1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/restFP.S @@ -0,0 +1,46 @@ +//===-- restFP.S - Implement restFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// Helper function used by compiler to restore ppc floating point registers at +// the end of the function epilog. This function returns to the address +// in the LR slot. So a function epilog must branch (b) not branch and link +// (bl) to this function. +// If the compiler wants to restore f27..f31, it does a "b restFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// +DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP) + lfd f14,-144(r1) + lfd f15,-136(r1) + lfd f16,-128(r1) + lfd f17,-120(r1) + lfd f18,-112(r1) + lfd f19,-104(r1) + lfd f20,-96(r1) + lfd f21,-88(r1) + lfd f22,-80(r1) + lfd f23,-72(r1) + lfd f24,-64(r1) + lfd f25,-56(r1) + lfd f26,-48(r1) + lfd f27,-40(r1) + lfd f28,-32(r1) + lfd f29,-24(r1) + lfd f30,-16(r1) + lfd f31,-8(r1) + lwz r0,8(r1) + mtlr r0 + blr + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/ppc/saveFP.S b/contrib/compiler-rt/lib/builtins/ppc/saveFP.S new file mode 100644 index 000000000000..20b06fff53d9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ppc/saveFP.S @@ -0,0 +1,43 @@ +//===-- saveFP.S - Implement saveFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// Helper function used by compiler to save ppc floating point registers in +// function prologs. This routines also saves r0 in the LR slot. +// If the compiler wants to save f27..f31, it does a "bl saveFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// +DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP) + stfd f14,-144(r1) + stfd f15,-136(r1) + stfd f16,-128(r1) + stfd f17,-120(r1) + stfd f18,-112(r1) + stfd f19,-104(r1) + stfd f20,-96(r1) + stfd f21,-88(r1) + stfd f22,-80(r1) + stfd f23,-72(r1) + stfd f24,-64(r1) + stfd f25,-56(r1) + stfd f26,-48(r1) + stfd f27,-40(r1) + stfd f28,-32(r1) + stfd f29,-24(r1) + stfd f30,-16(r1) + stfd f31,-8(r1) + stw r0,8(r1) + blr + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S b/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S new file mode 100644 index 000000000000..a58d237040b6 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S @@ -0,0 +1,28 @@ +//===--- mulsi3.S - Integer multiplication routines routines ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#if !defined(__riscv_mul) && __riscv_xlen == 32 + .text + .align 2 + + .globl __mulsi3 + .type __mulsi3, @function +__mulsi3: + mv a2, a0 + mv a0, zero +.L1: + andi a3, a1, 1 + beqz a3, .L2 + add a0, a0, a2 +.L2: + srli a1, a1, 1 + slli a2, a2, 1 + bnez a1, .L1 + ret +#endif diff --git a/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4 b/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4 new file mode 100644 index 000000000000..9150a2ed8263 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4 @@ -0,0 +1,248 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ + +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: define(N, 4) define(TWOSUPN, 16) + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: define(WORDSIZE, 32) + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) ) + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/N ) + * Note that this is log_base_(2ˆN) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*N-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C + * a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ + +define(dividend, `%o0') +define(divisor,`%o1') +define(Q, `%o2') +define(R, `%o3') +define(ITER, `%o4') +define(V, `%o5') +define(SIGN, `%g3') +define(T, `%g1') +define(SC,`%g2') +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=N + * $2 -- the current accumulation of quotient bits + * N -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * R -- current remainder + * Q -- current quotient + * V -- current comparand + * cc -- set on current value of R + * Dynamic output: + * R', Q', V', cc' + */ + +#include "../assembly.h" + +define(DEVELOP_QUOTIENT_BITS, +` !depth $1, accumulated bits $2 + bl L.$1.eval(TWOSUPN+$2) + srl V,1,V + ! remainder is nonnegative + subcc R,V,R + ifelse( $1, N, + ` b 9f + add Q, ($2*2+1), Q + ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)') + ') +L.$1.eval(TWOSUPN+$2): + ! remainder is negative + addcc R,V,R + ifelse( $1, N, + ` b 9f + add Q, ($2*2-1), Q + ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)') + ') + ifelse( $1, 1, `9:') +') +ifelse( ANSWER, `quotient', ` +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__udivsi3) + b divide + mov 0,SIGN ! result always nonnegative +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__divsi3) + orcc divisor,dividend,%g0 ! are either dividend or divisor negative + bge divide ! if not, skip this junk + xor divisor,dividend,SIGN ! record sign of result in sign of SIGN + tst divisor + bge 2f + tst dividend + ! divisor < 0 + bge divide + neg divisor + 2: + ! dividend < 0 + neg dividend + ! FALL THROUGH +',` +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) + b divide + mov 0,SIGN ! result always nonnegative +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__modsi3) + orcc divisor,dividend,%g0 ! are either dividend or divisor negative + bge divide ! if not, skip this junk + mov dividend,SIGN ! record sign of result in sign of SIGN + tst divisor + bge 2f + tst dividend + ! divisor < 0 + bge divide + neg divisor + 2: + ! dividend < 0 + neg dividend + ! FALL THROUGH +') + +divide: + ! Compute size of quotient, scale comparand. + orcc divisor,%g0,V ! movcc divisor,V + te 2 ! if divisor = 0 + mov dividend,R + mov 0,Q + sethi %hi(1<<(WORDSIZE-TOPBITS-1)),T + cmp R,T + blu not_really_big + mov 0,ITER + ! + ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is N*ITER+SC, where + ! SC <= N. + ! Compute ITER in an unorthodox manner: know we need to Shift V into +! the top decade: so don't even bother to compare to R. +1: + cmp V,T + bgeu 3f + mov 1,SC + sll V,N,V + b 1b + inc ITER +! Now compute SC +2: addcc V,V,V + bcc not_too_big + add SC,1,SC + ! We're here if the divisor overflowed when Shifting. + ! This means that R has the high-order bit set. + ! Restore V and subtract from R. + sll T,TOPBITS,T ! high order bit + srl V,1,V ! rest of V + add V,T,V + b do_single_div + dec SC +not_too_big: +3: cmp V,R + blu 2b + nop + be do_single_div + nop +! V > R: went too far: back up 1 step +! srl V,1,V +! dec SC +! do single-bit divide steps +! +! We have to be careful here. We know that R >= V, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if R >= 0. Because both R and V may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc SC + bl end_regular_divide + nop + sub R,V,R + mov 1,Q + b,a end_single_divloop + ! EMPTY +single_divloop: + sll Q,1,Q + bl 1f + srl V,1,V + ! R >= 0 + sub R,V,R + b 2f + inc Q + 1: ! R < 0 + add R,V,R + dec Q + 2: + end_single_divloop: + deccc SC + bge single_divloop + tst R + b,a end_regular_divide + ! EMPTY + +not_really_big: +1: + sll V,N,V + cmp V,R + bleu 1b + inccc ITER + be got_result + dec ITER +do_regular_divide: + ! Do the main division iteration + tst R + ! Fall through into divide loop +divloop: + sll Q,N,Q + DEVELOP_QUOTIENT_BITS( 1, 0 ) +end_regular_divide: + deccc ITER + bge divloop + tst R + bl,a got_result + ! non-restoring fixup if remainder < 0, otherwise annulled +ifelse( ANSWER, `quotient', +` dec Q +',` add R,divisor,R +') + +got_result: + tst SIGN + bl,a 1f + ! negate for answer < 0, otherwise annulled +ifelse( ANSWER, `quotient', +` neg %o2,%o2 ! Q <- -Q +',` neg %o3,%o3 ! R <- -R +') +1: + retl ! leaf-routine return +ifelse( ANSWER, `quotient', +` mov %o2,%o0 ! quotient <- Q +',` mov %o3,%o0 ! remainder <- R +') diff --git a/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S new file mode 100644 index 000000000000..70fc1f407f48 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S @@ -0,0 +1,330 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/4 ) + * Note that this is log_base_(2ˆ4) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C + * a large dividend is one greater than 2ˆ(31-4 ) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=4 + * $2 -- the current accumulation of quotient bits + * 4 -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * %o3 -- current remainder + * %o2 -- current quotient + * %o5 -- current comparand + * cc -- set on current value of %o3 + * Dynamic output: + * %o3', %o2', %o5', cc' + */ +#include "../assembly.h" +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__udivsi3) + b divide + mov 0,%g3 ! result always nonnegative +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__divsi3) + orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative + bge divide ! if not, skip this junk + xor %o1,%o0,%g3 ! record sign of result in sign of %g3 + tst %o1 + bge 2f + tst %o0 + ! %o1 < 0 + bge divide + neg %o1 + 2: + ! %o0 < 0 + neg %o0 + ! FALL THROUGH +divide: + ! Compute size of quotient, scale comparand. + orcc %o1,%g0,%o5 ! movcc %o1,%o5 + te 2 ! if %o1 = 0 + mov %o0,%o3 + mov 0,%o2 + sethi %hi(1<<(32-4 -1)),%g1 + cmp %o3,%g1 + blu not_really_big + mov 0,%o4 + ! + ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, + ! as our usual 4-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is 4*%o4+%g2, where + ! %g2 <= 4. + ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into +! the top decade: so don't even bother to compare to %o3. +1: + cmp %o5,%g1 + bgeu 3f + mov 1,%g2 + sll %o5,4,%o5 + b 1b + inc %o4 +! Now compute %g2 +2: addcc %o5,%o5,%o5 + bcc not_too_big + add %g2,1,%g2 + ! We're here if the %o1 overflowed when Shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1,4 ,%g1 ! high order bit + srl %o5,1,%o5 ! rest of %o5 + add %o5,%g1,%o5 + b do_single_div + dec %g2 +not_too_big: +3: cmp %o5,%o3 + blu 2b + nop + be do_single_div + nop +! %o5 > %o3: went too far: back up 1 step +! srl %o5,1,%o5 +! dec %g2 +! do single-bit divide steps +! +! We have to be careful here. We know that %o3 >= %o5, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc %g2 + bl end_regular_divide + nop + sub %o3,%o5,%o3 + mov 1,%o2 + b,a end_single_divloop + ! EMPTY +single_divloop: + sll %o2,1,%o2 + bl 1f + srl %o5,1,%o5 + ! %o3 >= 0 + sub %o3,%o5,%o3 + b 2f + inc %o2 + 1: ! %o3 < 0 + add %o3,%o5,%o3 + dec %o2 + 2: + end_single_divloop: + deccc %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + ! EMPTY +not_really_big: +1: + sll %o5,4,%o5 + cmp %o5,%o3 + bleu 1b + inccc %o4 + be got_result + dec %o4 +do_regular_divide: + ! Do the main division iteration + tst %o3 + ! Fall through into divide loop +divloop: + sll %o2,4,%o2 + !depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + 9: +end_regular_divide: + deccc %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup if remainder < 0, otherwise annulled + dec %o2 +got_result: + tst %g3 + bl,a 1f + ! negate for answer < 0, otherwise annulled + neg %o2,%o2 ! %o2 <- -%o2 +1: + retl ! leaf-routine return + mov %o2,%o0 ! quotient <- %o2 diff --git a/contrib/compiler-rt/lib/builtins/sparc64/generate.sh b/contrib/compiler-rt/lib/builtins/sparc64/generate.sh new file mode 100644 index 000000000000..17c1106f9900 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/generate.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S +m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > divsi3.S +echo '! This file intentionally left blank' > umodsi3.S +echo '! This file intentionally left blank' > udivsi3.S diff --git a/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S new file mode 100644 index 000000000000..e126e3d3d45e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S @@ -0,0 +1,330 @@ +/* + * This m4 code has been taken from The SPARC Architecture Manual Version 8. + */ +/* + * Division/Remainder + * + * Input is: + * dividend -- the thing being divided + * divisor -- how many ways to divide it + * Important parameters: + * N -- how many bits per iteration we try to get + * as our current guess: + * WORDSIZE -- how many bits altogether we're talking about: + * obviously: + * A derived constant: + * TOPBITS -- how many bits are in the top "decade" of a number: + * + * Important variables are: + * Q -- the partial quotient under development -- initially 0 + * R -- the remainder so far -- initially == the dividend + * ITER -- number of iterations of the main division loop which will + * be required. Equal to CEIL( lg2(quotient)/4 ) + * Note that this is log_base_(2ˆ4) of the quotient. + * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) + * Cost: + * current estimate for non-large dividend is + * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C + * a large dividend is one greater than 2ˆ(31-4 ) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + * This uses the m4 and cpp macro preprocessors. + */ +/* + * This is the recursive definition of how we develop quotient digits. + * It takes three important parameters: + * $1 -- the current depth, 1<=$1<=4 + * $2 -- the current accumulation of quotient bits + * 4 -- max depth + * We add a new bit to $2 and either recurse or insert the bits in the quotient. + * Dynamic input: + * %o3 -- current remainder + * %o2 -- current quotient + * %o5 -- current comparand + * cc -- set on current value of %o3 + * Dynamic output: + * %o3', %o2', %o5', cc' + */ +#include "../assembly.h" +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) + b divide + mov 0,%g3 ! result always nonnegative +.text + .align 32 +DEFINE_COMPILERRT_FUNCTION(__modsi3) + orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative + bge divide ! if not, skip this junk + mov %o0,%g3 ! record sign of result in sign of %g3 + tst %o1 + bge 2f + tst %o0 + ! %o1 < 0 + bge divide + neg %o1 + 2: + ! %o0 < 0 + neg %o0 + ! FALL THROUGH +divide: + ! Compute size of quotient, scale comparand. + orcc %o1,%g0,%o5 ! movcc %o1,%o5 + te 2 ! if %o1 = 0 + mov %o0,%o3 + mov 0,%o2 + sethi %hi(1<<(32-4 -1)),%g1 + cmp %o3,%g1 + blu not_really_big + mov 0,%o4 + ! + ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, + ! as our usual 4-at-a-shot divide step will cause overflow and havoc. + ! The total number of bits in the result here is 4*%o4+%g2, where + ! %g2 <= 4. + ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into +! the top decade: so don't even bother to compare to %o3. +1: + cmp %o5,%g1 + bgeu 3f + mov 1,%g2 + sll %o5,4,%o5 + b 1b + inc %o4 +! Now compute %g2 +2: addcc %o5,%o5,%o5 + bcc not_too_big + add %g2,1,%g2 + ! We're here if the %o1 overflowed when Shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1,4 ,%g1 ! high order bit + srl %o5,1,%o5 ! rest of %o5 + add %o5,%g1,%o5 + b do_single_div + dec %g2 +not_too_big: +3: cmp %o5,%o3 + blu 2b + nop + be do_single_div + nop +! %o5 > %o3: went too far: back up 1 step +! srl %o5,1,%o5 +! dec %g2 +! do single-bit divide steps +! +! We have to be careful here. We know that %o3 >= %o5, so we can do the +! first divide step without thinking. BUT, the others are conditional, +! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- +! order bit set in the first step, just falling into the regular +! division loop will mess up the first time around. +! So we unroll slightly... +do_single_div: + deccc %g2 + bl end_regular_divide + nop + sub %o3,%o5,%o3 + mov 1,%o2 + b,a end_single_divloop + ! EMPTY +single_divloop: + sll %o2,1,%o2 + bl 1f + srl %o5,1,%o5 + ! %o3 >= 0 + sub %o3,%o5,%o3 + b 2f + inc %o2 + 1: ! %o3 < 0 + add %o3,%o5,%o3 + dec %o2 + 2: + end_single_divloop: + deccc %g2 + bge single_divloop + tst %o3 + b,a end_regular_divide + ! EMPTY +not_really_big: +1: + sll %o5,4,%o5 + cmp %o5,%o3 + bleu 1b + inccc %o4 + be got_result + dec %o4 +do_regular_divide: + ! Do the main division iteration + tst %o3 + ! Fall through into divide loop +divloop: + sll %o2,4,%o2 + !depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + !depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + !depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is nonnegative + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + 9: +end_regular_divide: + deccc %o4 + bge divloop + tst %o3 + bl,a got_result + ! non-restoring fixup if remainder < 0, otherwise annulled + add %o3,%o1,%o3 +got_result: + tst %g3 + bl,a 1f + ! negate for answer < 0, otherwise annulled + neg %o3,%o3 ! %o3 <- -%o3 +1: + retl ! leaf-routine return + mov %o3,%o0 ! remainder <- %o3 diff --git a/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S new file mode 100644 index 000000000000..a41885276dfd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S @@ -0,0 +1 @@ +! This file intentionally left blank diff --git a/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S new file mode 100644 index 000000000000..a41885276dfd --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S @@ -0,0 +1 @@ +! This file intentionally left blank diff --git a/contrib/compiler-rt/lib/builtins/subdf3.c b/contrib/compiler-rt/lib/builtins/subdf3.c new file mode 100644 index 000000000000..a892fa603cf2 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subdf3.c @@ -0,0 +1,32 @@ +//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subdf3(fp_t a, fp_t b) { + return __adddf3(a, fromRep(toRep(b) ^ signBit)); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) { + return __subdf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subdf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/subsf3.c b/contrib/compiler-rt/lib/builtins/subsf3.c new file mode 100644 index 000000000000..4b2786177dcc --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subsf3.c @@ -0,0 +1,32 @@ +//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subsf3(fp_t a, fp_t b) { + return __addsf3(a, fromRep(toRep(b) ^ signBit)); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { + return __subsf3(a, b); +} +#else +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subsf3); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/subtf3.c b/contrib/compiler-rt/lib/builtins/subtf3.c new file mode 100644 index 000000000000..609b816f41e3 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subtf3.c @@ -0,0 +1,27 @@ +//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float subtraction with the +// IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t +__subtf3(fp_t a, fp_t b) { + return __addtf3(a, fromRep(toRep(b) ^ signBit)); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/subvdi3.c b/contrib/compiler-rt/lib/builtins/subvdi3.c new file mode 100644 index 000000000000..71fc70ffa92d --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subvdi3.c @@ -0,0 +1,36 @@ +/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI di_int +__subvdi3(di_int a, di_int b) +{ + di_int s = (du_int) a - (du_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/compiler-rt/lib/builtins/subvsi3.c b/contrib/compiler-rt/lib/builtins/subvsi3.c new file mode 100644 index 000000000000..e6c0fb688c97 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subvsi3.c @@ -0,0 +1,36 @@ +/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI si_int +__subvsi3(si_int a, si_int b) +{ + si_int s = (su_int) a - (su_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} diff --git a/contrib/compiler-rt/lib/builtins/subvti3.c b/contrib/compiler-rt/lib/builtins/subvti3.c new file mode 100644 index 000000000000..a6804d2d7b95 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/subvti3.c @@ -0,0 +1,40 @@ +/* ===-- subvti3.c - Implement __subvti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __subvti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a - b */ + +/* Effects: aborts if a - b overflows */ + +COMPILER_RT_ABI ti_int +__subvti3(ti_int a, ti_int b) +{ + ti_int s = (tu_int) a - (tu_int) b; + if (b >= 0) + { + if (s > a) + compilerrt_abort(); + } + else + { + if (s <= a) + compilerrt_abort(); + } + return s; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/trampoline_setup.c b/contrib/compiler-rt/lib/builtins/trampoline_setup.c new file mode 100644 index 000000000000..25b627ab7658 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/trampoline_setup.c @@ -0,0 +1,48 @@ +/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +extern void __clear_cache(void* start, void* end); + +/* + * The ppc compiler generates calls to __trampoline_setup() when creating + * trampoline functions on the stack for use with nested functions. + * This function creates a custom 40-byte trampoline function on the stack + * which loads r11 with a pointer to the outer function's locals + * and then jumps to the target nested function. + */ + +#if __ppc__ && !defined(__powerpc64__) +COMPILER_RT_ABI void +__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr) +{ + /* should never happen, but if compiler did not allocate */ + /* enough space on stack for the trampoline, abort */ + if ( trampSizeAllocated < 40 ) + compilerrt_abort(); + + /* create trampoline */ + trampOnStack[0] = 0x7c0802a6; /* mflr r0 */ + trampOnStack[1] = 0x4800000d; /* bl Lbase */ + trampOnStack[2] = (uint32_t)realFunc; + trampOnStack[3] = (uint32_t)localsPtr; + trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */ + trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */ + trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */ + trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */ + trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */ + trampOnStack[9] = 0x4e800420; /* bctr */ + + /* clear instruction cache */ + __clear_cache(trampOnStack, &trampOnStack[10]); +} +#endif /* __ppc__ && !defined(__powerpc64__) */ diff --git a/contrib/compiler-rt/lib/builtins/truncdfhf2.c b/contrib/compiler-rt/lib/builtins/truncdfhf2.c new file mode 100644 index 000000000000..8354a41b8b6f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/truncdfhf2.c @@ -0,0 +1,26 @@ +//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_DOUBLE +#define DST_HALF +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { + return __truncXfYf2__(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI uint16_t __aeabi_d2h(double a) { + return __truncdfhf2(a); +} +#else +AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/truncdfsf2.c b/contrib/compiler-rt/lib/builtins/truncdfsf2.c new file mode 100644 index 000000000000..195d3e0656e7 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/truncdfsf2.c @@ -0,0 +1,26 @@ +//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_DOUBLE +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI float __truncdfsf2(double a) { + return __truncXfYf2__(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_d2f(double a) { + return __truncdfsf2(a); +} +#else +AEABI_RTABI float __aeabi_d2f(double a) COMPILER_RT_ALIAS(__truncdfsf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/truncsfhf2.c b/contrib/compiler-rt/lib/builtins/truncsfhf2.c new file mode 100644 index 000000000000..9c84ab4f938a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/truncsfhf2.c @@ -0,0 +1,32 @@ +//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define SRC_SINGLE +#define DST_HALF +#include "fp_trunc_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { + return __truncXfYf2__(a); +} + +COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { + return __truncsfhf2(a); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI uint16_t __aeabi_f2h(float a) { + return __truncsfhf2(a); +} +#else +AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2); +#endif +#endif diff --git a/contrib/compiler-rt/lib/builtins/trunctfdf2.c b/contrib/compiler-rt/lib/builtins/trunctfdf2.c new file mode 100644 index 000000000000..741a71b33c5a --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/trunctfdf2.c @@ -0,0 +1,22 @@ +//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_QUAD +#define DST_DOUBLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI double __trunctfdf2(long double a) { + return __truncXfYf2__(a); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/trunctfsf2.c b/contrib/compiler-rt/lib/builtins/trunctfsf2.c new file mode 100644 index 000000000000..de96c1decf63 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/trunctfsf2.c @@ -0,0 +1,22 @@ +//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +#define SRC_QUAD +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI float __trunctfsf2(long double a) { + return __truncXfYf2__(a); +} + +#endif diff --git a/contrib/compiler-rt/lib/builtins/ucmpdi2.c b/contrib/compiler-rt/lib/builtins/ucmpdi2.c new file mode 100644 index 000000000000..40af23613b1f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ucmpdi2.c @@ -0,0 +1,51 @@ +/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ucmpdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__ucmpdi2(du_int a, du_int b) +{ + udwords x; + x.all = a; + udwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#ifdef __ARM_EABI__ +/* Returns: if (a < b) returns -1 +* if (a == b) returns 0 +* if (a > b) returns 1 +*/ +COMPILER_RT_ABI si_int +__aeabi_ulcmp(di_int a, di_int b) +{ + return __ucmpdi2(a, b) - 1; +} +#endif + diff --git a/contrib/compiler-rt/lib/builtins/ucmpti2.c b/contrib/compiler-rt/lib/builtins/ucmpti2.c new file mode 100644 index 000000000000..bda8083bb2a1 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/ucmpti2.c @@ -0,0 +1,42 @@ +/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ucmpti2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: if (a < b) returns 0 + * if (a == b) returns 1 + * if (a > b) returns 2 + */ + +COMPILER_RT_ABI si_int +__ucmpti2(tu_int a, tu_int b) +{ + utwords x; + x.all = a; + utwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/udivdi3.c b/contrib/compiler-rt/lib/builtins/udivdi3.c new file mode 100644 index 000000000000..dc68e154b109 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivdi3.c @@ -0,0 +1,23 @@ +/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivdi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +COMPILER_RT_ABI du_int +__udivdi3(du_int a, du_int b) +{ + return __udivmoddi4(a, b, 0); +} diff --git a/contrib/compiler-rt/lib/builtins/udivmoddi4.c b/contrib/compiler-rt/lib/builtins/udivmoddi4.c new file mode 100644 index 000000000000..0c8b4ff46474 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivmoddi4.c @@ -0,0 +1,231 @@ +/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} diff --git a/contrib/compiler-rt/lib/builtins/udivmodsi4.c b/contrib/compiler-rt/lib/builtins/udivmodsi4.c new file mode 100644 index 000000000000..789c4b5061e4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivmodsi4.c @@ -0,0 +1,27 @@ +/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmodsi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b, *rem = a % b */ + +COMPILER_RT_ABI su_int +__udivmodsi4(su_int a, su_int b, su_int* rem) +{ + si_int d = __udivsi3(a,b); + *rem = a - (d*b); + return d; +} + + diff --git a/contrib/compiler-rt/lib/builtins/udivmodti4.c b/contrib/compiler-rt/lib/builtins/udivmodti4.c new file mode 100644 index 000000000000..803168849c6c --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivmodti4.c @@ -0,0 +1,238 @@ +/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmodti4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI tu_int +__udivmodti4(tu_int a, tu_int b, tu_int* rem) +{ + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; + utwords n; + n.all = a; + utwords d; + d.all = b; + utwords q; + utwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctzll(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /* 0 <= sr <= n_udword_bits - 2 or sr large */ + if (sr > n_udword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits - 1 */ + /* q.all = n.all << (n_utword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_udword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctzll(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) + - __builtin_clzll(n.s.high); + /* 2 <= sr <= n_utword_bits - 1 + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_udword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_udword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } + else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 + { + q.s.low = n.s.low << (n_utword_bits - sr); + q.s.high = (n.s.high << (n_utword_bits - sr)) | + (n.s.low >> (sr - n_udword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_udword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /*0 <= sr <= n_udword_bits - 1 or sr large */ + if (sr > n_udword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + q.s.low = 0; + if (sr == n_udword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + q.s.high = n.s.low << (n_udword_bits - sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_utword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/udivsi3.c b/contrib/compiler-rt/lib/builtins/udivsi3.c new file mode 100644 index 000000000000..bb720f8c382b --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivsi3.c @@ -0,0 +1,68 @@ +/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a / b */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +/* This function should not call __divsi3! */ +COMPILER_RT_ABI su_int +__udivsi3(su_int n, su_int d) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + su_int q; + su_int r; + unsigned sr; + /* special cases */ + if (d == 0) + return 0; /* ?! */ + if (n == 0) + return 0; + sr = __builtin_clz(d) - __builtin_clz(n); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) /* d > r */ + return 0; + if (sr == n_uword_bits - 1) /* d == 1 */ + return n; + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* Not a special case */ + q = n << (n_uword_bits - sr); + r = n >> sr; + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r = (r << 1) | (q >> (n_uword_bits - 1)); + q = (q << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); + carry = s & 1; + r -= d & s; + } + q = (q << 1) | carry; + return q; +} + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) COMPILER_RT_ALIAS(__udivsi3); +#endif diff --git a/contrib/compiler-rt/lib/builtins/udivti3.c b/contrib/compiler-rt/lib/builtins/udivti3.c new file mode 100644 index 000000000000..ec94673e25b0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/udivti3.c @@ -0,0 +1,27 @@ +/* ===-- udivti3.c - Implement __udivti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a / b */ + +COMPILER_RT_ABI tu_int +__udivti3(tu_int a, tu_int b) +{ + return __udivmodti4(a, b, 0); +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/umoddi3.c b/contrib/compiler-rt/lib/builtins/umoddi3.c new file mode 100644 index 000000000000..d513f080a1e9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/umoddi3.c @@ -0,0 +1,25 @@ +/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umoddi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI du_int +__umoddi3(du_int a, du_int b) +{ + du_int r; + __udivmoddi4(a, b, &r); + return r; +} diff --git a/contrib/compiler-rt/lib/builtins/umodsi3.c b/contrib/compiler-rt/lib/builtins/umodsi3.c new file mode 100644 index 000000000000..d5fda4a6af1f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/umodsi3.c @@ -0,0 +1,23 @@ +/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umodsi3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: a % b */ + +COMPILER_RT_ABI su_int +__umodsi3(su_int a, su_int b) +{ + return a - __udivsi3(a, b) * b; +} diff --git a/contrib/compiler-rt/lib/builtins/umodti3.c b/contrib/compiler-rt/lib/builtins/umodti3.c new file mode 100644 index 000000000000..6d1ca7a8cf6f --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/umodti3.c @@ -0,0 +1,29 @@ +/* ===-- umodti3.c - Implement __umodti3 -----------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __umodti3 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +/* Returns: a % b */ + +COMPILER_RT_ABI tu_int +__umodti3(tu_int a, tu_int b) +{ + tu_int r; + __udivmodti4(a, b, &r); + return r; +} + +#endif /* CRT_HAS_128BIT */ diff --git a/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h b/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h new file mode 100644 index 000000000000..ccb0765975a9 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h @@ -0,0 +1,55 @@ +/* ===-- arm-ehabi-helpers.h - Supplementary ARM EHABI declarations --------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===--------------------------------------------------------------------=== */ + +#ifndef UNWIND_EHABI_HELPERS_H +#define UNWIND_EHABI_HELPERS_H + +#include <stdint.h> +/* NOTE: see reasoning for this inclusion below */ +#include <unwind.h> + +#if !defined(__ARM_EABI_UNWINDER__) + +/* + * NOTE: _URC_OK, _URC_FAILURE must be present as preprocessor tokens. This + * allows for a substitution of a constant which can be cast into the + * appropriate enumerated type. This header is expected to always be included + * AFTER unwind.h (which is why it is forcefully included above). This ensures + * that we do not overwrite the token for the enumeration. Subsequent uses of + * the token would be clean to rewrite with constant values. + * + * The typedef redeclaration should be safe. Due to the protection granted to + * us by the `__ARM_EABI_UNWINDER__` above, we are guaranteed that we are in a + * header not vended by gcc. The HP unwinder (being an itanium unwinder) does + * not support EHABI, and the GNU unwinder, derived from the HP unwinder, also + * does not support EHABI as of the introduction of this header. As such, we + * are fairly certain that we are in the LLVM case. Here, _Unwind_State is a + * typedef, and so we can get away with a redeclaration. + * + * Guarded redefinitions of the needed unwind state prevent the redefinition of + * those states. + */ + +#define _URC_OK 0 +#define _URC_FAILURE 9 + +typedef uint32_t _Unwind_State; + +#if !defined(_US_UNWIND_FRAME_STARTING) +#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1) +#endif + +#if !defined(_US_ACTION_MASK) +#define _US_ACTION_MASK ((_Unwind_State)3) +#endif + +#endif + +#endif + diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S new file mode 100644 index 000000000000..4149ac63d9d0 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S @@ -0,0 +1,39 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// _chkstk routine +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// Notes from r227519 +// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp +// themselves. It also does not clobber %rax so we can reuse it when +// adjusting %rsp. + +#ifdef __x86_64__ + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(___chkstk_ms) + push %rcx + push %rax + cmp $0x1000,%rax + lea 24(%rsp),%rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + pop %rax + pop %rcx + ret +END_COMPILERRT_FUNCTION(___chkstk_ms) + +#endif // __x86_64__ diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S new file mode 100644 index 000000000000..ac1eb920e0e8 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S @@ -0,0 +1,42 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +#ifdef __x86_64__ + +// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments, +// then decrement %rsp by %rax. Preserves all registers except %rsp and flags. +// This routine is windows specific +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__alloca) + mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx + // fallthrough +DEFINE_COMPILERRT_FUNCTION(___chkstk) + push %rcx + cmp $0x1000,%rax + lea 16(%rsp),%rcx // rsp before calling this routine -> rcx + jb 1f +2: + sub $0x1000,%rcx + test %rcx,(%rcx) + sub $0x1000,%rax + cmp $0x1000,%rax + ja 2b +1: + sub %rax,%rcx + test %rcx,(%rcx) + + lea 8(%rsp),%rax // load pointer to the return address into rax + mov %rcx,%rsp // install the new top of stack pointer into rsp + mov -8(%rax),%rcx // restore rcx + push (%rax) // push return address onto the stack + sub %rsp,%rax // restore the original value in rax + ret +END_COMPILERRT_FUNCTION(___chkstk) +END_COMPILERRT_FUNCTION(__alloca) + +#endif // __x86_64__ diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c new file mode 100644 index 000000000000..dead0ed42c65 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c @@ -0,0 +1,16 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* double __floatdidf(di_int a); */ + +#if defined(__x86_64__) || defined(_M_X64) + +#include "../int_lib.h" + +double __floatdidf(int64_t a) +{ + return (double)a; +} + +#endif /* __x86_64__ */ diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c new file mode 100644 index 000000000000..99d5621c6327 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c @@ -0,0 +1,14 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +#if defined(__x86_64__) || defined(_M_X64) + +#include "../int_lib.h" + +float __floatdisf(int64_t a) +{ + return (float)a; +} + +#endif /* __x86_64__ */ diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c new file mode 100644 index 000000000000..c01193a82b5e --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c @@ -0,0 +1,16 @@ +/* This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + */ + +/* long double __floatdixf(di_int a); */ + +#ifdef __x86_64__ + +#include "../int_lib.h" + +long double __floatdixf(int64_t a) +{ + return (long double)a; +} + +#endif /* __i386__ */ diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S new file mode 100644 index 000000000000..094a68dc3cd4 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S @@ -0,0 +1,52 @@ +//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// double __floatundidf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +twop52: + .quad 0x4330000000000000 + + .balign 16 +twop84_plus_twop52: + .quad 0x4530000000100000 + + .balign 16 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundidf) + movd %edi, %xmm0 // low 32 bits of a + shrq $32, %rdi // high 32 bits of a + orq REL_ADDR(twop84), %rdi // 0x1p84 + a_hi (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + movd %rdi, %xmm1 + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + ret +END_COMPILERRT_FUNCTION(__floatundidf) + +#endif // __x86_64__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S new file mode 100644 index 000000000000..7c9f75e188eb --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S @@ -0,0 +1,38 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// float __floatundisf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +two: + .single 2.0 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundisf) + movq $1, %rsi + testq %rdi, %rdi + js 1f + cvtsi2ssq %rdi, %xmm0 + ret + +1: andq %rdi, %rsi + shrq %rdi + orq %rsi, %rdi + cvtsi2ssq %rdi, %xmm0 + mulss REL_ADDR(two), %xmm0 + ret +END_COMPILERRT_FUNCTION(__floatundisf) + +#endif // __x86_64__ + +NO_EXEC_STACK_DIRECTIVE + diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S new file mode 100644 index 000000000000..28a096b71373 --- /dev/null +++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S @@ -0,0 +1,71 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// long double __floatundixf(du_int a); + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 16 +twop64: + .quad 0x43f0000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + + .text + + .balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + movq %rdi, -8(%rsp) + fildq -8(%rsp) + test %rdi, %rdi + js 1f + ret +1: faddl REL_ADDR(twop64) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __x86_64__ + + +/* Branch-free implementation is ever so slightly slower, but more beautiful. + It is likely superior for inlining, so I kept it around for future reference. + +#ifdef __x86_64__ + +CONST_SECTION + + .balign 4 +twop52: + .quad 0x4330000000000000 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 +twop84: + .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.balign 4 +DEFINE_COMPILERRT_FUNCTION(__floatundixf) + movl %edi, %esi // low 32 bits of input + shrq $32, %rdi // hi 32 bits of input + orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double) + orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double) + movq %rdi, -8(%rsp) + movq %rsi, -16(%rsp) + fldl REL_ADDR(twop84_plus_twop52_neg) + faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs) + faddl -16(%rsp) // hi + lo (as double extended) + ret +END_COMPILERRT_FUNCTION(__floatundixf) + +#endif // __x86_64__ + +*/ + +NO_EXEC_STACK_DIRECTIVE + |