aboutsummaryrefslogtreecommitdiff
path: root/contrib/compiler-rt/lib/builtins
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/compiler-rt/lib/builtins')
-rw-r--r--contrib/compiler-rt/lib/builtins/README.txt346
-rw-r--r--contrib/compiler-rt/lib/builtins/aarch64/chkstk.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/absvdi2.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/absvsi2.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/absvti2.c34
-rw-r--r--contrib/compiler-rt/lib/builtins/adddf3.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/addsf3.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/addtf3.c25
-rw-r--r--contrib/compiler-rt/lib/builtins/addvdi3.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/addvsi3.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/addvti3.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/apple_versioning.c350
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/addsf3.S277
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S145
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c16
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S140
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c16
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S52
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c19
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S52
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c19
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S51
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S29
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S50
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S58
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/bswapdi2.S44
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/bswapsi2.S36
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/chkstk.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/clzdi2.S93
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/clzsi2.S73
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/comparesf2.S296
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/divmodsi4.S71
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/divsi3.S82
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S35
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S35
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/modsi3.S60
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S35
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S35
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list21
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/switch16.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/switch32.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/switch8.S44
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/switchu8.S44
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync-ops.h64
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S23
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S24
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S24
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S23
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S24
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S24
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S22
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S26
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S38
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S180
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/udivsi3.S264
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/umodsi3.S158
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/ashldi3.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/ashlti3.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/ashrdi3.c46
-rw-r--r--contrib/compiler-rt/lib/builtins/ashrti3.c46
-rw-r--r--contrib/compiler-rt/lib/builtins/assembly.h204
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic.c338
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_flag_clear.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c28
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c28
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_signal_fence.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/atomic_thread_fence.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/bswapdi2.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/bswapsi2.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/clear_cache.c199
-rw-r--r--contrib/compiler-rt/lib/builtins/clzdi2.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/clzsi2.c53
-rw-r--r--contrib/compiler-rt/lib/builtins/clzti2.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/cmpdi2.c51
-rw-r--r--contrib/compiler-rt/lib/builtins/cmpti2.c42
-rw-r--r--contrib/compiler-rt/lib/builtins/comparedf2.c153
-rw-r--r--contrib/compiler-rt/lib/builtins/comparesf2.c153
-rw-r--r--contrib/compiler-rt/lib/builtins/comparetf2.c138
-rw-r--r--contrib/compiler-rt/lib/builtins/cpu_model.c651
-rw-r--r--contrib/compiler-rt/lib/builtins/ctzdi2.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/ctzsi2.c57
-rw-r--r--contrib/compiler-rt/lib/builtins/ctzti2.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/divdc3.c62
-rw-r--r--contrib/compiler-rt/lib/builtins/divdf3.c193
-rw-r--r--contrib/compiler-rt/lib/builtins/divdi3.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/divmoddi4.c25
-rw-r--r--contrib/compiler-rt/lib/builtins/divmodsi4.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/divsc3.c63
-rw-r--r--contrib/compiler-rt/lib/builtins/divsf3.c177
-rw-r--r--contrib/compiler-rt/lib/builtins/divsi3.c39
-rw-r--r--contrib/compiler-rt/lib/builtins/divtc3.c63
-rw-r--r--contrib/compiler-rt/lib/builtins/divtf3.c203
-rw-r--r--contrib/compiler-rt/lib/builtins/divti3.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/divxc3.c63
-rw-r--r--contrib/compiler-rt/lib/builtins/emutls.c405
-rw-r--r--contrib/compiler-rt/lib/builtins/enable_execute_stack.c72
-rw-r--r--contrib/compiler-rt/lib/builtins/eprintf.c35
-rw-r--r--contrib/compiler-rt/lib/builtins/extenddftf2.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/extendhfsf2.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/extendsfdf2.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/extendsftf2.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/ffsdi2.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/ffssi2.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/ffsti2.c37
-rw-r--r--contrib/compiler-rt/lib/builtins/fixdfdi.c55
-rw-r--r--contrib/compiler-rt/lib/builtins/fixdfsi.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/fixdfti.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/fixsfdi.c55
-rw-r--r--contrib/compiler-rt/lib/builtins/fixsfsi.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/fixsfti.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/fixtfdi.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/fixtfsi.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/fixtfti.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsdfdi.c52
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsdfsi.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsdfti.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunssfdi.c53
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunssfsi.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunssfti.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunstfdi.c22
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunstfsi.c22
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunstfti.c22
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsxfdi.c46
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsxfsi.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/fixunsxfti.c50
-rw-r--r--contrib/compiler-rt/lib/builtins/fixxfdi.c48
-rw-r--r--contrib/compiler-rt/lib/builtins/fixxfti.c51
-rw-r--r--contrib/compiler-rt/lib/builtins/floatdidf.c115
-rw-r--r--contrib/compiler-rt/lib/builtins/floatdisf.c88
-rw-r--r--contrib/compiler-rt/lib/builtins/floatditf.c50
-rw-r--r--contrib/compiler-rt/lib/builtins/floatdixf.c46
-rw-r--r--contrib/compiler-rt/lib/builtins/floatsidf.c61
-rw-r--r--contrib/compiler-rt/lib/builtins/floatsisf.c67
-rw-r--r--contrib/compiler-rt/lib/builtins/floatsitf.c50
-rw-r--r--contrib/compiler-rt/lib/builtins/floattidf.c83
-rw-r--r--contrib/compiler-rt/lib/builtins/floattisf.c82
-rw-r--r--contrib/compiler-rt/lib/builtins/floattitf.c82
-rw-r--r--contrib/compiler-rt/lib/builtins/floattixf.c84
-rw-r--r--contrib/compiler-rt/lib/builtins/floatundidf.c114
-rw-r--r--contrib/compiler-rt/lib/builtins/floatundisf.c85
-rw-r--r--contrib/compiler-rt/lib/builtins/floatunditf.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/floatundixf.c42
-rw-r--r--contrib/compiler-rt/lib/builtins/floatunsidf.c50
-rw-r--r--contrib/compiler-rt/lib/builtins/floatunsisf.c58
-rw-r--r--contrib/compiler-rt/lib/builtins/floatunsitf.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/floatuntidf.c80
-rw-r--r--contrib/compiler-rt/lib/builtins/floatuntisf.c79
-rw-r--r--contrib/compiler-rt/lib/builtins/floatuntitf.c79
-rw-r--r--contrib/compiler-rt/lib/builtins/floatuntixf.c81
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_add_impl.inc144
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_extend.h89
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_extend_impl.inc108
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc41
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc39
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_lib.h327
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_mul_impl.inc116
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_trunc.h76
-rw-r--r--contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc135
-rw-r--r--contrib/compiler-rt/lib/builtins/gcc_personality_v0.c251
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S103
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S268
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S157
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S398
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S492
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dffma.S705
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S79
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dfmul.S418
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S406
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/divdi3.S85
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/divsi3.S84
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S37
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S491
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S345
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S400
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S31
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S30
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S125
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S64
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/moddi3.S83
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/modsi3.S66
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S66
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S82
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S71
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S71
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S60
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S56
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S74
-rw-r--r--contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S55
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/ashldi3.S61
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/ashrdi3.S72
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/chkstk.S34
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/chkstk2.S40
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/divdi3.S165
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatdidf.S42
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatdisf.S35
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatdixf.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatundidf.S55
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatundisf.S108
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/floatundixf.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/lshrdi3.S62
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/moddi3.S169
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/muldi3.S33
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/udivdi3.S118
-rw-r--r--contrib/compiler-rt/lib/builtins/i386/umoddi3.S129
-rw-r--r--contrib/compiler-rt/lib/builtins/int_endianness.h116
-rw-r--r--contrib/compiler-rt/lib/builtins/int_lib.h163
-rw-r--r--contrib/compiler-rt/lib/builtins/int_math.h110
-rw-r--r--contrib/compiler-rt/lib/builtins/int_types.h189
-rw-r--r--contrib/compiler-rt/lib/builtins/int_util.c71
-rw-r--r--contrib/compiler-rt/lib/builtins/int_util.h33
-rw-r--r--contrib/compiler-rt/lib/builtins/lshrdi3.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/lshrti3.c45
-rw-r--r--contrib/compiler-rt/lib/builtins/mingw_fixfloat.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/moddi3.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/modsi3.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/modti3.c34
-rw-r--r--contrib/compiler-rt/lib/builtins/muldc3.c73
-rw-r--r--contrib/compiler-rt/lib/builtins/muldf3.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/muldi3.c58
-rw-r--r--contrib/compiler-rt/lib/builtins/mulodi4.c58
-rw-r--r--contrib/compiler-rt/lib/builtins/mulosi4.c58
-rw-r--r--contrib/compiler-rt/lib/builtins/muloti4.c62
-rw-r--r--contrib/compiler-rt/lib/builtins/mulsc3.c73
-rw-r--r--contrib/compiler-rt/lib/builtins/mulsf3.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/multc3.c68
-rw-r--r--contrib/compiler-rt/lib/builtins/multf3.c25
-rw-r--r--contrib/compiler-rt/lib/builtins/multi3.c58
-rw-r--r--contrib/compiler-rt/lib/builtins/mulvdi3.c56
-rw-r--r--contrib/compiler-rt/lib/builtins/mulvsi3.c56
-rw-r--r--contrib/compiler-rt/lib/builtins/mulvti3.c60
-rw-r--r--contrib/compiler-rt/lib/builtins/mulxc3.c77
-rw-r--r--contrib/compiler-rt/lib/builtins/negdf2.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/negdi2.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/negsf2.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/negti2.c30
-rw-r--r--contrib/compiler-rt/lib/builtins/negvdi2.c28
-rw-r--r--contrib/compiler-rt/lib/builtins/negvsi2.c28
-rw-r--r--contrib/compiler-rt/lib/builtins/negvti2.c32
-rw-r--r--contrib/compiler-rt/lib/builtins/os_version_check.c226
-rw-r--r--contrib/compiler-rt/lib/builtins/paritydi2.c25
-rw-r--r--contrib/compiler-rt/lib/builtins/paritysi2.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/parityti2.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/popcountdi2.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/popcountsi2.c33
-rw-r--r--contrib/compiler-rt/lib/builtins/popcountti2.c44
-rw-r--r--contrib/compiler-rt/lib/builtins/powidf2.c34
-rw-r--r--contrib/compiler-rt/lib/builtins/powisf2.c34
-rw-r--r--contrib/compiler-rt/lib/builtins/powitf2.c38
-rw-r--r--contrib/compiler-rt/lib/builtins/powixf2.c38
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/DD.h45
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/divtc3.c97
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c104
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c59
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c106
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/floatditf.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/floattitf.c48
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/floatunditf.c41
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c76
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c55
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c53
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c76
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/multc3.c90
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/restFP.S46
-rw-r--r--contrib/compiler-rt/lib/builtins/ppc/saveFP.S43
-rw-r--r--contrib/compiler-rt/lib/builtins/riscv/mulsi3.S28
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/divmod.m4248
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/divsi3.S330
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/generate.sh6
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/modsi3.S330
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S1
-rw-r--r--contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S1
-rw-r--r--contrib/compiler-rt/lib/builtins/subdf3.c32
-rw-r--r--contrib/compiler-rt/lib/builtins/subsf3.c32
-rw-r--r--contrib/compiler-rt/lib/builtins/subtf3.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/subvdi3.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/subvsi3.c36
-rw-r--r--contrib/compiler-rt/lib/builtins/subvti3.c40
-rw-r--r--contrib/compiler-rt/lib/builtins/trampoline_setup.c48
-rw-r--r--contrib/compiler-rt/lib/builtins/truncdfhf2.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/truncdfsf2.c26
-rw-r--r--contrib/compiler-rt/lib/builtins/truncsfhf2.c32
-rw-r--r--contrib/compiler-rt/lib/builtins/trunctfdf2.c22
-rw-r--r--contrib/compiler-rt/lib/builtins/trunctfsf2.c22
-rw-r--r--contrib/compiler-rt/lib/builtins/ucmpdi2.c51
-rw-r--r--contrib/compiler-rt/lib/builtins/ucmpti2.c42
-rw-r--r--contrib/compiler-rt/lib/builtins/udivdi3.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/udivmoddi4.c231
-rw-r--r--contrib/compiler-rt/lib/builtins/udivmodsi4.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/udivmodti4.c238
-rw-r--r--contrib/compiler-rt/lib/builtins/udivsi3.c68
-rw-r--r--contrib/compiler-rt/lib/builtins/udivti3.c27
-rw-r--r--contrib/compiler-rt/lib/builtins/umoddi3.c25
-rw-r--r--contrib/compiler-rt/lib/builtins/umodsi3.c23
-rw-r--r--contrib/compiler-rt/lib/builtins/umodti3.c29
-rw-r--r--contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h55
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/chkstk.S39
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S42
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c16
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c14
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c16
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S52
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S38
-rw-r--r--contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S71
354 files changed, 26095 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/builtins/README.txt b/contrib/compiler-rt/lib/builtins/README.txt
new file mode 100644
index 000000000000..e603dfa05356
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/README.txt
@@ -0,0 +1,346 @@
+Compiler-RT
+================================
+
+This directory and its subdirectories contain source code for the compiler
+support routines.
+
+Compiler-RT is open source software. You may freely distribute it under the
+terms of the license agreement found in LICENSE.txt.
+
+================================
+
+This is a replacement library for libgcc. Each function is contained
+in its own file. Each function has a corresponding unit test under
+test/Unit.
+
+A rudimentary script to test each file is in the file called
+test/Unit/test.
+
+Here is the specification for this library:
+
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc
+
+Here is a synopsis of the contents of this library:
+
+typedef int si_int;
+typedef unsigned su_int;
+
+typedef long long di_int;
+typedef unsigned long long du_int;
+
+// Integral bit manipulation
+
+di_int __ashldi3(di_int a, si_int b); // a << b
+ti_int __ashlti3(ti_int a, si_int b); // a << b
+
+di_int __ashrdi3(di_int a, si_int b); // a >> b arithmetic (sign fill)
+ti_int __ashrti3(ti_int a, si_int b); // a >> b arithmetic (sign fill)
+di_int __lshrdi3(di_int a, si_int b); // a >> b logical (zero fill)
+ti_int __lshrti3(ti_int a, si_int b); // a >> b logical (zero fill)
+
+si_int __clzsi2(si_int a); // count leading zeros
+si_int __clzdi2(di_int a); // count leading zeros
+si_int __clzti2(ti_int a); // count leading zeros
+si_int __ctzsi2(si_int a); // count trailing zeros
+si_int __ctzdi2(di_int a); // count trailing zeros
+si_int __ctzti2(ti_int a); // count trailing zeros
+
+si_int __ffssi2(si_int a); // find least significant 1 bit
+si_int __ffsdi2(di_int a); // find least significant 1 bit
+si_int __ffsti2(ti_int a); // find least significant 1 bit
+
+si_int __paritysi2(si_int a); // bit parity
+si_int __paritydi2(di_int a); // bit parity
+si_int __parityti2(ti_int a); // bit parity
+
+si_int __popcountsi2(si_int a); // bit population
+si_int __popcountdi2(di_int a); // bit population
+si_int __popcountti2(ti_int a); // bit population
+
+uint32_t __bswapsi2(uint32_t a); // a byteswapped
+uint64_t __bswapdi2(uint64_t a); // a byteswapped
+
+// Integral arithmetic
+
+di_int __negdi2 (di_int a); // -a
+ti_int __negti2 (ti_int a); // -a
+di_int __muldi3 (di_int a, di_int b); // a * b
+ti_int __multi3 (ti_int a, ti_int b); // a * b
+si_int __divsi3 (si_int a, si_int b); // a / b signed
+di_int __divdi3 (di_int a, di_int b); // a / b signed
+ti_int __divti3 (ti_int a, ti_int b); // a / b signed
+su_int __udivsi3 (su_int n, su_int d); // a / b unsigned
+du_int __udivdi3 (du_int a, du_int b); // a / b unsigned
+tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned
+si_int __modsi3 (si_int a, si_int b); // a % b signed
+di_int __moddi3 (di_int a, di_int b); // a % b signed
+ti_int __modti3 (ti_int a, ti_int b); // a % b signed
+su_int __umodsi3 (su_int a, su_int b); // a % b unsigned
+du_int __umoddi3 (du_int a, du_int b); // a % b unsigned
+tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned
+du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned
+tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned
+su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned
+si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed
+
+
+
+// Integral arithmetic with trapping overflow
+
+si_int __absvsi2(si_int a); // abs(a)
+di_int __absvdi2(di_int a); // abs(a)
+ti_int __absvti2(ti_int a); // abs(a)
+
+si_int __negvsi2(si_int a); // -a
+di_int __negvdi2(di_int a); // -a
+ti_int __negvti2(ti_int a); // -a
+
+si_int __addvsi3(si_int a, si_int b); // a + b
+di_int __addvdi3(di_int a, di_int b); // a + b
+ti_int __addvti3(ti_int a, ti_int b); // a + b
+
+si_int __subvsi3(si_int a, si_int b); // a - b
+di_int __subvdi3(di_int a, di_int b); // a - b
+ti_int __subvti3(ti_int a, ti_int b); // a - b
+
+si_int __mulvsi3(si_int a, si_int b); // a * b
+di_int __mulvdi3(di_int a, di_int b); // a * b
+ti_int __mulvti3(ti_int a, ti_int b); // a * b
+
+
+// Integral arithmetic which returns if overflow
+
+si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range
+di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range
+ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to
+ one if result not in signed range
+
+
+// Integral comparison: a < b -> 0
+// a == b -> 1
+// a > b -> 2
+
+si_int __cmpdi2 (di_int a, di_int b);
+si_int __cmpti2 (ti_int a, ti_int b);
+si_int __ucmpdi2(du_int a, du_int b);
+si_int __ucmpti2(tu_int a, tu_int b);
+
+// Integral / floating point conversion
+
+di_int __fixsfdi( float a);
+di_int __fixdfdi( double a);
+di_int __fixxfdi(long double a);
+
+ti_int __fixsfti( float a);
+ti_int __fixdfti( double a);
+ti_int __fixxfti(long double a);
+uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation
+
+su_int __fixunssfsi( float a);
+su_int __fixunsdfsi( double a);
+su_int __fixunsxfsi(long double a);
+
+du_int __fixunssfdi( float a);
+du_int __fixunsdfdi( double a);
+du_int __fixunsxfdi(long double a);
+
+tu_int __fixunssfti( float a);
+tu_int __fixunsdfti( double a);
+tu_int __fixunsxfti(long double a);
+uint64_t __fixunstfdi(long double input); // ppc only
+
+float __floatdisf(di_int a);
+double __floatdidf(di_int a);
+long double __floatdixf(di_int a);
+long double __floatditf(int64_t a); // ppc only
+
+float __floattisf(ti_int a);
+double __floattidf(ti_int a);
+long double __floattixf(ti_int a);
+
+float __floatundisf(du_int a);
+double __floatundidf(du_int a);
+long double __floatundixf(du_int a);
+long double __floatunditf(uint64_t a); // ppc only
+
+float __floatuntisf(tu_int a);
+double __floatuntidf(tu_int a);
+long double __floatuntixf(tu_int a);
+
+// Floating point raised to integer power
+
+float __powisf2( float a, si_int b); // a ^ b
+double __powidf2( double a, si_int b); // a ^ b
+long double __powixf2(long double a, si_int b); // a ^ b
+long double __powitf2(long double a, si_int b); // ppc only, a ^ b
+
+// Complex arithmetic
+
+// (a + ib) * (c + id)
+
+ float _Complex __mulsc3( float a, float b, float c, float d);
+ double _Complex __muldc3(double a, double b, double c, double d);
+long double _Complex __mulxc3(long double a, long double b,
+ long double c, long double d);
+long double _Complex __multc3(long double a, long double b,
+ long double c, long double d); // ppc only
+
+// (a + ib) / (c + id)
+
+ float _Complex __divsc3( float a, float b, float c, float d);
+ double _Complex __divdc3(double a, double b, double c, double d);
+long double _Complex __divxc3(long double a, long double b,
+ long double c, long double d);
+long double _Complex __divtc3(long double a, long double b,
+ long double c, long double d); // ppc only
+
+
+// Runtime support
+
+// __clear_cache() is used to tell process that new instructions have been
+// written to an address range. Necessary on processors that do not have
+// a unified instruction and data cache.
+void __clear_cache(void* start, void* end);
+
+// __enable_execute_stack() is used with nested functions when a trampoline
+// function is written onto the stack and that page range needs to be made
+// executable.
+void __enable_execute_stack(void* addr);
+
+// __gcc_personality_v0() is normally only called by the system unwinder.
+// C code (as opposed to C++) normally does not need a personality function
+// because there are no catch clauses or destructors to be run. But there
+// is a C language extension __attribute__((cleanup(func))) which marks local
+// variables as needing the cleanup function "func" to be run when the
+// variable goes out of scope. That includes when an exception is thrown,
+// so a personality handler is needed.
+_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions,
+ uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+ _Unwind_Context_t context);
+
+// for use with some implementations of assert() in <assert.h>
+void __eprintf(const char* format, const char* assertion_expression,
+ const char* line, const char* file);
+
+// for systems with emulated thread local storage
+void* __emutls_get_address(struct __emutls_control*);
+
+
+// Power PC specific functions
+
+// There is no C interface to the saveFP/restFP functions. They are helper
+// functions called by the prolog and epilog of functions that need to save
+// a number of non-volatile float point registers.
+saveFP
+restFP
+
+// PowerPC has a standard template for trampoline functions. This function
+// generates a custom trampoline function with the specific realFunc
+// and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
+// adds two 128-bit double-double precision values ( x + y )
+long double __gcc_qadd(long double x, long double y);
+
+// subtracts two 128-bit double-double precision values ( x - y )
+long double __gcc_qsub(long double x, long double y);
+
+// multiples two 128-bit double-double precision values ( x * y )
+long double __gcc_qmul(long double x, long double y);
+
+// divides two 128-bit double-double precision values ( x / y )
+long double __gcc_qdiv(long double a, long double b);
+
+
+// ARM specific functions
+
+// There is no C interface to the switch* functions. These helper functions
+// are only needed by Thumb1 code for efficient switch table generation.
+switch16
+switch32
+switch8
+switchu8
+
+// There is no C interface to the *_vfp_d8_d15_regs functions. There are
+// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
+// SJLJ for exceptions, each function with a catch clause or destuctors needs
+// to save and restore all registers in it prolog and epliog. But there is
+// no way to access vector and high float registers from thumb1 code, so the
+// compiler must add call outs to these helper functions in the prolog and
+// epilog.
+restore_vfp_d8_d15_regs
+save_vfp_d8_d15_regs
+
+
+// Note: long ago ARM processors did not have floating point hardware support.
+// Floating point was done in software and floating point parameters were
+// passed in integer registers. When hardware support was added for floating
+// point, new *vfp functions were added to do the same operations but with
+// floating point parameters in floating point registers.
+
+// Undocumented functions
+
+float __addsf3vfp(float a, float b); // Appears to return a + b
+double __adddf3vfp(double a, double b); // Appears to return a + b
+float __divsf3vfp(float a, float b); // Appears to return a / b
+double __divdf3vfp(double a, double b); // Appears to return a / b
+int __eqsf2vfp(float a, float b); // Appears to return one
+ // iff a == b and neither is NaN.
+int __eqdf2vfp(double a, double b); // Appears to return one
+ // iff a == b and neither is NaN.
+double __extendsfdf2vfp(float a); // Appears to convert from
+ // float to double.
+int __fixdfsivfp(double a); // Appears to convert from
+ // double to int.
+int __fixsfsivfp(float a); // Appears to convert from
+ // float to int.
+unsigned int __fixunssfsivfp(float a); // Appears to convert from
+ // float to unsigned int.
+unsigned int __fixunsdfsivfp(double a); // Appears to convert from
+ // double to unsigned int.
+double __floatsidfvfp(int a); // Appears to convert from
+ // int to double.
+float __floatsisfvfp(int a); // Appears to convert from
+ // int to float.
+double __floatunssidfvfp(unsigned int a); // Appears to convert from
+ // unisgned int to double.
+float __floatunssisfvfp(unsigned int a); // Appears to convert from
+ // unisgned int to float.
+int __gedf2vfp(double a, double b); // Appears to return __gedf2
+ // (a >= b)
+int __gesf2vfp(float a, float b); // Appears to return __gesf2
+ // (a >= b)
+int __gtdf2vfp(double a, double b); // Appears to return __gtdf2
+ // (a > b)
+int __gtsf2vfp(float a, float b); // Appears to return __gtsf2
+ // (a > b)
+int __ledf2vfp(double a, double b); // Appears to return __ledf2
+ // (a <= b)
+int __lesf2vfp(float a, float b); // Appears to return __lesf2
+ // (a <= b)
+int __ltdf2vfp(double a, double b); // Appears to return __ltdf2
+ // (a < b)
+int __ltsf2vfp(float a, float b); // Appears to return __ltsf2
+ // (a < b)
+double __muldf3vfp(double a, double b); // Appears to return a * b
+float __mulsf3vfp(float a, float b); // Appears to return a * b
+int __nedf2vfp(double a, double b); // Appears to return __nedf2
+ // (a != b)
+double __negdf2vfp(double a); // Appears to return -a
+float __negsf2vfp(float a); // Appears to return -a
+float __negsf2vfp(float a); // Appears to return -a
+double __subdf3vfp(double a, double b); // Appears to return a - b
+float __subsf3vfp(float a, float b); // Appears to return a - b
+float __truncdfsf2vfp(double a); // Appears to convert from
+ // double to float.
+int __unorddf2vfp(double a, double b); // Appears to return __unorddf2
+int __unordsf2vfp(float a, float b); // Appears to return __unordsf2
+
+
+Preconditions are listed for each function at the definition when there are any.
+Any preconditions reflect the specification at
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc.
+
+Assumptions are listed in "int_lib.h", and in individual files. Where possible
+assumptions are checked at compile time.
diff --git a/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S b/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S
new file mode 100644
index 000000000000..89ec90b08a13
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/aarch64/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// __chkstk routine
+// This routine is windows specific.
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// This clobbers registers x16 and x17.
+// Does not modify any memory or the stack pointer.
+
+// mov x15, #256 // Number of bytes of stack, in units of 16 byte
+// bl __chkstk
+// sub sp, sp, x15, lsl #4
+
+#ifdef __aarch64__
+
+#define PAGE_SIZE 4096
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+ lsl x16, x15, #4
+ mov x17, sp
+1:
+ sub x17, x17, #PAGE_SIZE
+ subs x16, x16, #PAGE_SIZE
+ ldr xzr, [x17]
+ b.gt 1b
+
+ ret
+END_COMPILERRT_FUNCTION(__chkstk)
+
+#endif // __aarch64__
diff --git a/contrib/compiler-rt/lib/builtins/absvdi2.c b/contrib/compiler-rt/lib/builtins/absvdi2.c
new file mode 100644
index 000000000000..682c2355d2ac
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/absvdi2.c
@@ -0,0 +1,29 @@
+/*===-- absvdi2.c - Implement __absvdi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __absvdi2 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI di_int
+__absvdi2(di_int a)
+{
+ const int N = (int)(sizeof(di_int) * CHAR_BIT);
+ if (a == ((di_int)1 << (N-1)))
+ compilerrt_abort();
+ const di_int t = a >> (N - 1);
+ return (a ^ t) - t;
+}
diff --git a/contrib/compiler-rt/lib/builtins/absvsi2.c b/contrib/compiler-rt/lib/builtins/absvsi2.c
new file mode 100644
index 000000000000..4812af815982
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/absvsi2.c
@@ -0,0 +1,29 @@
+/* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __absvsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI si_int
+__absvsi2(si_int a)
+{
+ const int N = (int)(sizeof(si_int) * CHAR_BIT);
+ if (a == (1 << (N-1)))
+ compilerrt_abort();
+ const si_int t = a >> (N - 1);
+ return (a ^ t) - t;
+}
diff --git a/contrib/compiler-rt/lib/builtins/absvti2.c b/contrib/compiler-rt/lib/builtins/absvti2.c
new file mode 100644
index 000000000000..7927770c9ab3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/absvti2.c
@@ -0,0 +1,34 @@
+/* ===-- absvti2.c - Implement __absvdi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __absvti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: absolute value */
+
+/* Effects: aborts if abs(x) < 0 */
+
+COMPILER_RT_ABI ti_int
+__absvti2(ti_int a)
+{
+ const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+ if (a == ((ti_int)1 << (N-1)))
+ compilerrt_abort();
+ const ti_int s = a >> (N - 1);
+ return (a ^ s) - s;
+}
+
+#endif /* CRT_HAS_128BIT */
+
diff --git a/contrib/compiler-rt/lib/builtins/adddf3.c b/contrib/compiler-rt/lib/builtins/adddf3.c
new file mode 100644
index 000000000000..9a3901312e51
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/adddf3.c
@@ -0,0 +1,30 @@
+//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI double __adddf3(double a, double b){
+ return __addXf3__(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_dadd(double a, double b) {
+ return __adddf3(a, b);
+}
+#else
+AEABI_RTABI double __aeabi_dadd(double a, double b) COMPILER_RT_ALIAS(__adddf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/addsf3.c b/contrib/compiler-rt/lib/builtins/addsf3.c
new file mode 100644
index 000000000000..c5c1a41c3611
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/addsf3.c
@@ -0,0 +1,30 @@
+//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI float __addsf3(float a, float b) {
+ return __addXf3__(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_fadd(float a, float b) {
+ return __addsf3(a, b);
+}
+#else
+AEABI_RTABI float __aeabi_fadd(float a, float b) COMPILER_RT_ALIAS(__addsf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/addtf3.c b/contrib/compiler-rt/lib/builtins/addtf3.c
new file mode 100644
index 000000000000..e4bbe0227ae6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/addtf3.c
@@ -0,0 +1,25 @@
+//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float addition with the IEEE-754
+// default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI long double __addtf3(long double a, long double b){
+ return __addXf3__(a, b);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/addvdi3.c b/contrib/compiler-rt/lib/builtins/addvdi3.c
new file mode 100644
index 000000000000..0da38945679d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/addvdi3.c
@@ -0,0 +1,36 @@
+/* ===-- addvdi3.c - Implement __addvdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI di_int
+__addvdi3(di_int a, di_int b)
+{
+ di_int s = (du_int) a + (du_int) b;
+ if (b >= 0)
+ {
+ if (s < a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s >= a)
+ compilerrt_abort();
+ }
+ return s;
+}
diff --git a/contrib/compiler-rt/lib/builtins/addvsi3.c b/contrib/compiler-rt/lib/builtins/addvsi3.c
new file mode 100644
index 000000000000..94ca726f42b9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/addvsi3.c
@@ -0,0 +1,36 @@
+/* ===-- addvsi3.c - Implement __addvsi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI si_int
+__addvsi3(si_int a, si_int b)
+{
+ si_int s = (su_int) a + (su_int) b;
+ if (b >= 0)
+ {
+ if (s < a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s >= a)
+ compilerrt_abort();
+ }
+ return s;
+}
diff --git a/contrib/compiler-rt/lib/builtins/addvti3.c b/contrib/compiler-rt/lib/builtins/addvti3.c
new file mode 100644
index 000000000000..c224de60aab0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/addvti3.c
@@ -0,0 +1,40 @@
+/* ===-- addvti3.c - Implement __addvti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __addvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a + b */
+
+/* Effects: aborts if a + b overflows */
+
+COMPILER_RT_ABI ti_int
+__addvti3(ti_int a, ti_int b)
+{
+ ti_int s = (tu_int) a + (tu_int) b;
+ if (b >= 0)
+ {
+ if (s < a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s >= a)
+ compilerrt_abort();
+ }
+ return s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/apple_versioning.c b/contrib/compiler-rt/lib/builtins/apple_versioning.c
new file mode 100644
index 000000000000..3797a1ab02da
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/apple_versioning.c
@@ -0,0 +1,350 @@
+/* ===-- apple_versioning.c - Adds versioning symbols for ld ---------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+
+#if __APPLE__
+ #include <Availability.h>
+
+ #if __IPHONE_OS_VERSION_MIN_REQUIRED
+ #define NOT_HERE_BEFORE_10_6(sym)
+ #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \
+ extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \
+ extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \
+ extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \
+ extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp50 = 0;
+ #else
+ #define NOT_HERE_BEFORE_10_6(sym) \
+ extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+ extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp5 = 0;
+ #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \
+ extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \
+ extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \
+ extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp6 = 0;
+ #endif
+
+
+/* Symbols in libSystem.dylib in 10.6 and later,
+ * but are in libgcc_s.dylib in earlier versions
+ */
+
+NOT_HERE_BEFORE_10_6(__absvdi2)
+NOT_HERE_BEFORE_10_6(__absvsi2)
+NOT_HERE_BEFORE_10_6(__absvti2)
+NOT_HERE_BEFORE_10_6(__addvdi3)
+NOT_HERE_BEFORE_10_6(__addvsi3)
+NOT_HERE_BEFORE_10_6(__addvti3)
+NOT_HERE_BEFORE_10_6(__ashldi3)
+NOT_HERE_BEFORE_10_6(__ashlti3)
+NOT_HERE_BEFORE_10_6(__ashrdi3)
+NOT_HERE_BEFORE_10_6(__ashrti3)
+NOT_HERE_BEFORE_10_6(__clear_cache)
+NOT_HERE_BEFORE_10_6(__clzdi2)
+NOT_HERE_BEFORE_10_6(__clzsi2)
+NOT_HERE_BEFORE_10_6(__clzti2)
+NOT_HERE_BEFORE_10_6(__cmpdi2)
+NOT_HERE_BEFORE_10_6(__cmpti2)
+NOT_HERE_BEFORE_10_6(__ctzdi2)
+NOT_HERE_BEFORE_10_6(__ctzsi2)
+NOT_HERE_BEFORE_10_6(__ctzti2)
+NOT_HERE_BEFORE_10_6(__divdc3)
+NOT_HERE_BEFORE_10_6(__divdi3)
+NOT_HERE_BEFORE_10_6(__divsc3)
+NOT_HERE_BEFORE_10_6(__divtc3)
+NOT_HERE_BEFORE_10_6(__divti3)
+NOT_HERE_BEFORE_10_6(__divxc3)
+NOT_HERE_BEFORE_10_6(__enable_execute_stack)
+NOT_HERE_BEFORE_10_6(__ffsdi2)
+NOT_HERE_BEFORE_10_6(__ffsti2)
+NOT_HERE_BEFORE_10_6(__fixdfdi)
+NOT_HERE_BEFORE_10_6(__fixdfti)
+NOT_HERE_BEFORE_10_6(__fixsfdi)
+NOT_HERE_BEFORE_10_6(__fixsfti)
+NOT_HERE_BEFORE_10_6(__fixtfdi)
+NOT_HERE_BEFORE_10_6(__fixunsdfdi)
+NOT_HERE_BEFORE_10_6(__fixunsdfsi)
+NOT_HERE_BEFORE_10_6(__fixunsdfti)
+NOT_HERE_BEFORE_10_6(__fixunssfdi)
+NOT_HERE_BEFORE_10_6(__fixunssfsi)
+NOT_HERE_BEFORE_10_6(__fixunssfti)
+NOT_HERE_BEFORE_10_6(__fixunstfdi)
+NOT_HERE_BEFORE_10_6(__fixunsxfdi)
+NOT_HERE_BEFORE_10_6(__fixunsxfsi)
+NOT_HERE_BEFORE_10_6(__fixunsxfti)
+NOT_HERE_BEFORE_10_6(__fixxfdi)
+NOT_HERE_BEFORE_10_6(__fixxfti)
+NOT_HERE_BEFORE_10_6(__floatdidf)
+NOT_HERE_BEFORE_10_6(__floatdisf)
+NOT_HERE_BEFORE_10_6(__floatditf)
+NOT_HERE_BEFORE_10_6(__floatdixf)
+NOT_HERE_BEFORE_10_6(__floattidf)
+NOT_HERE_BEFORE_10_6(__floattisf)
+NOT_HERE_BEFORE_10_6(__floattixf)
+NOT_HERE_BEFORE_10_6(__floatundidf)
+NOT_HERE_BEFORE_10_6(__floatundisf)
+NOT_HERE_BEFORE_10_6(__floatunditf)
+NOT_HERE_BEFORE_10_6(__floatundixf)
+NOT_HERE_BEFORE_10_6(__floatuntidf)
+NOT_HERE_BEFORE_10_6(__floatuntisf)
+NOT_HERE_BEFORE_10_6(__floatuntixf)
+NOT_HERE_BEFORE_10_6(__gcc_personality_v0)
+NOT_HERE_BEFORE_10_6(__lshrdi3)
+NOT_HERE_BEFORE_10_6(__lshrti3)
+NOT_HERE_BEFORE_10_6(__moddi3)
+NOT_HERE_BEFORE_10_6(__modti3)
+NOT_HERE_BEFORE_10_6(__muldc3)
+NOT_HERE_BEFORE_10_6(__muldi3)
+NOT_HERE_BEFORE_10_6(__mulsc3)
+NOT_HERE_BEFORE_10_6(__multc3)
+NOT_HERE_BEFORE_10_6(__multi3)
+NOT_HERE_BEFORE_10_6(__mulvdi3)
+NOT_HERE_BEFORE_10_6(__mulvsi3)
+NOT_HERE_BEFORE_10_6(__mulvti3)
+NOT_HERE_BEFORE_10_6(__mulxc3)
+NOT_HERE_BEFORE_10_6(__negdi2)
+NOT_HERE_BEFORE_10_6(__negti2)
+NOT_HERE_BEFORE_10_6(__negvdi2)
+NOT_HERE_BEFORE_10_6(__negvsi2)
+NOT_HERE_BEFORE_10_6(__negvti2)
+NOT_HERE_BEFORE_10_6(__paritydi2)
+NOT_HERE_BEFORE_10_6(__paritysi2)
+NOT_HERE_BEFORE_10_6(__parityti2)
+NOT_HERE_BEFORE_10_6(__popcountdi2)
+NOT_HERE_BEFORE_10_6(__popcountsi2)
+NOT_HERE_BEFORE_10_6(__popcountti2)
+NOT_HERE_BEFORE_10_6(__powidf2)
+NOT_HERE_BEFORE_10_6(__powisf2)
+NOT_HERE_BEFORE_10_6(__powitf2)
+NOT_HERE_BEFORE_10_6(__powixf2)
+NOT_HERE_BEFORE_10_6(__subvdi3)
+NOT_HERE_BEFORE_10_6(__subvsi3)
+NOT_HERE_BEFORE_10_6(__subvti3)
+NOT_HERE_BEFORE_10_6(__ucmpdi2)
+NOT_HERE_BEFORE_10_6(__ucmpti2)
+NOT_HERE_BEFORE_10_6(__udivdi3)
+NOT_HERE_BEFORE_10_6(__udivmoddi4)
+NOT_HERE_BEFORE_10_6(__udivmodti4)
+NOT_HERE_BEFORE_10_6(__udivti3)
+NOT_HERE_BEFORE_10_6(__umoddi3)
+NOT_HERE_BEFORE_10_6(__umodti3)
+
+
+#if __ppc__
+NOT_HERE_BEFORE_10_6(__gcc_qadd)
+NOT_HERE_BEFORE_10_6(__gcc_qdiv)
+NOT_HERE_BEFORE_10_6(__gcc_qmul)
+NOT_HERE_BEFORE_10_6(__gcc_qsub)
+NOT_HERE_BEFORE_10_6(__trampoline_setup)
+#endif /* __ppc__ */
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8)
+
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4)
+NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8)
+
+
+#if __arm__ && __DYNAMIC__
+ #define NOT_HERE_UNTIL_AFTER_4_3(sym) \
+ extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \
+ extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \
+ extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \
+ extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \
+ extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \
+ extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \
+ extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \
+ __attribute__((visibility("default"))) const char sym##_tmp7 = 0;
+
+NOT_HERE_UNTIL_AFTER_4_3(__absvdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__absvsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__adddf3)
+NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__addsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__addvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__addvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ashldi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__clzdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__clzsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__divdc3)
+NOT_HERE_UNTIL_AFTER_4_3(__divdf3)
+NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__divdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsc3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__divsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__eqdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__eqsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi)
+NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatdidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatdisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatundidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatundisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gedf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gtdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__gtsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ledf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__lesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__ltdf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ltsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__moddi3)
+NOT_HERE_UNTIL_AFTER_4_3(__modsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldc3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldf3)
+NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__muldi3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsc3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__nedf2)
+NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__negdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__negvdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__negvsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__nesf2)
+NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__paritydi2)
+NOT_HERE_UNTIL_AFTER_4_3(__paritysi2)
+NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2)
+NOT_HERE_UNTIL_AFTER_4_3(__powidf2)
+NOT_HERE_UNTIL_AFTER_4_3(__powisf2)
+NOT_HERE_UNTIL_AFTER_4_3(__subdf3)
+NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__subsf3)
+NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__subvdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__subvsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2)
+NOT_HERE_UNTIL_AFTER_4_3(__udivdi3)
+NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4)
+NOT_HERE_UNTIL_AFTER_4_3(__udivsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__umoddi3)
+NOT_HERE_UNTIL_AFTER_4_3(__umodsi3)
+NOT_HERE_UNTIL_AFTER_4_3(__unorddf2)
+NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp)
+NOT_HERE_UNTIL_AFTER_4_3(__unordsf2)
+NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp)
+
+NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4)
+NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4)
+#endif // __arm__ && __DYNAMIC__
+
+
+
+
+
+#else /* !__APPLE__ */
+
+extern int avoid_empty_file;
+
+#endif /* !__APPLE__*/
diff --git a/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S
new file mode 100644
index 000000000000..8e476cad1624
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S
@@ -0,0 +1,33 @@
+//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// double __adddf3vfp(double a, double b) { return a + b; }
+//
+// Adds two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f64 d0, d0, d1
+#else
+ vmov d6, r0, r1 // move first param from r0/r1 pair into d6
+ vmov d7, r2, r3 // move second param from r2/r3 pair into d7
+ vadd.f64 d6, d6, d7
+ vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__adddf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3.S b/contrib/compiler-rt/lib/builtins/arm/addsf3.S
new file mode 100644
index 000000000000..74723cbeff74
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/addsf3.S
@@ -0,0 +1,277 @@
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+ .syntax unified
+ .text
+ .thumb
+ .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+ push {r4, r5, r6, r7, lr}
+ // Get the absolute value of a and b.
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ lsrs r2, r2, #1 /* aAbs */
+ beq LOCAL_LABEL(a_zero_nan_inf)
+ lsrs r3, r3, #1 /* bAbs */
+ beq LOCAL_LABEL(zero_nan_inf)
+
+ // Detect if a or b is infinity or Nan.
+ lsrs r6, r2, #(significandBits)
+ lsrs r7, r3, #(significandBits)
+ cmp r6, #0xFF
+ beq LOCAL_LABEL(zero_nan_inf)
+ cmp r7, #0xFF
+ beq LOCAL_LABEL(zero_nan_inf)
+
+ // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+ cmp r2, r3
+ bhs LOCAL_LABEL(no_swap)
+ movs r4, r0
+ movs r5, r2
+ movs r0, r1
+ movs r2, r3
+ movs r1, r4
+ movs r3, r5
+LOCAL_LABEL(no_swap):
+
+ // Get the significands and shift them to give us round, guard and sticky.
+ lsls r4, r0, #(typeWidth - significandBits)
+ lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+ lsls r5, r1, #(typeWidth - significandBits)
+ lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+ // Get the implicitBit.
+ movs r6, #1
+ lsls r6, r6, #(significandBits + 3)
+
+ // Get aExponent and set implicit bit if necessary.
+ lsrs r2, r2, #(significandBits)
+ beq LOCAL_LABEL(a_done_implicit_bit)
+ orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+ // Get bExponent and set implicit bit if necessary.
+ lsrs r3, r3, #(significandBits)
+ beq LOCAL_LABEL(b_done_implicit_bit)
+ orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+ // Get the difference in exponents.
+ subs r6, r2, r3
+ beq LOCAL_LABEL(done_align)
+
+ // If b is denormal, then a must be normal as align > 0, and we only need to
+ // right shift bSignificand by (align - 1) bits.
+ cmp r3, #0
+ bne 1f
+ subs r6, r6, #1
+1:
+
+ // No longer needs bExponent. r3 is dead here.
+ // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+ movs r3, #(typeWidth)
+ subs r3, r3, r6
+ movs r7, r5
+ lsls r7, r3
+ beq 1f
+ movs r7, #1
+1:
+
+ // bSignificand = bSignificand >> align | sticky;
+ lsrs r5, r6
+ orrs r5, r7
+ bne LOCAL_LABEL(done_align)
+ movs r5, #1 // sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+ // isSubtraction = (aRep ^ bRep) >> 31;
+ movs r7, r0
+ eors r7, r1
+ lsrs r7, #31
+ bne LOCAL_LABEL(do_substraction)
+
+ // Same sign, do Addition.
+
+ // aSignificand += bSignificand;
+ adds r4, r4, r5
+
+ // Check carry bit.
+ movs r6, #1
+ lsls r6, r6, #(significandBits + 3 + 1)
+ movs r7, r4
+ ands r7, r6
+ beq LOCAL_LABEL(form_result)
+ // If the addition carried up, we need to right-shift the result and
+ // adjust the exponent.
+ movs r7, r4
+ movs r6, #1
+ ands r7, r6 // sticky = aSignificand & 1;
+ lsrs r4, #1
+ orrs r4, r7 // result Significand
+ adds r2, #1 // result Exponent
+ // If we have overflowed the type, return +/- infinity.
+ cmp r2, 0xFF
+ beq LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+ // Shift the sign, exponent and significand into place.
+ lsrs r0, #(typeWidth - 1)
+ lsls r0, #(typeWidth - 1) // Get Sign.
+ lsls r2, #(significandBits)
+ orrs r0, r2
+ movs r1, r4
+ lsls r4, #(typeWidth - significandBits - 3)
+ lsrs r4, #(typeWidth - significandBits)
+ orrs r0, r4
+
+ // Final rounding. The result may overflow to infinity, but that is the
+ // correct result in that case.
+ // roundGuardSticky = aSignificand & 0x7;
+ movs r2, #0x7
+ ands r1, r2
+ // if (roundGuardSticky > 0x4) result++;
+
+ cmp r1, #0x4
+ blt LOCAL_LABEL(done_round)
+ beq 1f
+ adds r0, #1
+ pop {r4, r5, r6, r7, pc}
+1:
+
+ // if (roundGuardSticky == 0x4) result += result & 1;
+ movs r1, r0
+ lsrs r1, #1
+ bcc LOCAL_LABEL(done_round)
+ adds r0, r0, #1
+LOCAL_LABEL(done_round):
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+ subs r4, r4, r5 // aSignificand -= bSignificand;
+ beq LOCAL_LABEL(ret_zero)
+ movs r6, r4
+ cmp r2, 0
+ beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+ // If partial cancellation occured, we need to left-shift the result
+ // and adjust the exponent:
+ lsrs r6, r6, #(significandBits + 3)
+ bne LOCAL_LABEL(form_result)
+
+ push {r0, r1, r2, r3}
+ movs r0, r4
+ bl SYMBOL_NAME(__clzsi2)
+ movs r5, r0
+ pop {r0, r1, r2, r3}
+ // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+ subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+ // aSignificand <<= shift; aExponent -= shift;
+ lsls r4, r5
+ subs r2, r2, r5
+ bgt LOCAL_LABEL(form_result)
+
+ // Do normalization if aExponent <= 0.
+ movs r6, #1
+ subs r6, r6, r2 // 1 - aExponent;
+ movs r2, #0 // aExponent = 0;
+ movs r3, #(typeWidth) // bExponent is dead.
+ subs r3, r3, r6
+ movs r7, r4
+ lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align))
+ beq 1f
+ movs r7, #1
+1:
+ lsrs r4, r6 /* aSignificand >> shift */
+ orrs r4, r7
+ b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+ movs r0, #0
+ pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+ lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+ // Here r2 has aAbs, r3 has bAbs
+ movs r4, #0xFF
+ lsls r4, r4, #(significandBits) // Make +inf.
+
+ cmp r2, r4
+ bhi LOCAL_LABEL(a_is_nan)
+ cmp r3, r4
+ bhi LOCAL_LABEL(b_is_nan)
+
+ cmp r2, r4
+ bne LOCAL_LABEL(a_is_rational)
+ // aAbs is INF.
+ eors r1, r0 // aRep ^ bRep.
+ movs r6, #1
+ lsls r6, r6, #(typeWidth - 1) // get sign mask.
+ cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+ beq LOCAL_LABEL(a_is_nan)
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+ cmp r3, r4
+ bne LOCAL_LABEL(b_is_rational)
+ movs r0, r1
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+ // either a or b or both are zero.
+ adds r4, r2, r3
+ beq LOCAL_LABEL(both_zero)
+ cmp r2, #0 // is absA 0 ?
+ beq LOCAL_LABEL(ret_b)
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+ ands r0, r1 // +0 + -0 = +0
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+ movs r0, r1
+
+LOCAL_LABEL(ret):
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+ movs r0, r1
+LOCAL_LABEL(a_is_nan):
+ movs r1, #1
+ lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+ orrs r0, r1
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+ movs r4, #0xFF
+ lsls r4, r4, #(significandBits)
+ orrs r0, r4
+ lsrs r0, r0, #(significandBits)
+ lsls r0, r0, #(significandBits)
+ pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S
new file mode 100644
index 000000000000..8871efdcc5d1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __addsf3vfp(float a, float b);
+//
+// Adds two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed in GPRs
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f32 s0, s0, s1
+#else
+ vmov s14, r0 // move first param from r0 into float register
+ vmov s15, r1 // move second param from r1 into float register
+ vadd.f32 s14, s14, s15
+ vmov r0, s14 // move result back to r0
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__addsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
new file mode 100644
index 000000000000..adc2d55d90f5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
@@ -0,0 +1,145 @@
+//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cdcmpeq(double a, double b) {
+// if (isnan(a) || isnan(b)) {
+// Z = 0; C = 1;
+// } else {
+// __aeabi_cdcmple(a, b);
+// }
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+ push {r0-r3, lr}
+ bl __aeabi_cdcmpeq_check_nan
+ cmp r0, #1
+#if defined(USE_THUMB_1)
+ beq 1f
+ // NaN has been ruled out, so __aeabi_cdcmple can't trap
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_cdcmple
+ pop {r0-r3, pc}
+1:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ pop {r0-r3, lr}
+
+ // NaN has been ruled out, so __aeabi_cdcmple can't trap
+ // Use "it ne" + unconditional branch to guarantee a supported relocation if
+ // __aeabi_cdcmple is in a different section for some builds.
+ IT(ne)
+ bne __aeabi_cdcmple
+
+#if defined(USE_THUMB_2)
+ mov ip, #APSR_C
+ msr APSR_nzcvq, ip
+#else
+ msr APSR_nzcvq, #APSR_C
+#endif
+ JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+
+
+// void __aeabi_cdcmple(double a, double b) {
+// if (__aeabi_dcmplt(a, b)) {
+// Z = 0; C = 0;
+// } else if (__aeabi_dcmpeq(a, b)) {
+// Z = 1; C = 1;
+// } else {
+// Z = 0; C = 1;
+// }
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+ // Per the RTABI, this function must preserve r0-r11.
+ // Save lr in the same instruction for compactness
+ push {r0-r3, lr}
+
+ bl __aeabi_dcmplt
+ cmp r0, #1
+#if defined(USE_THUMB_1)
+ bne 1f
+ // Z = 0, C = 0
+ movs r0, #1
+ lsls r0, r0, #1
+ pop {r0-r3, pc}
+1:
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_dcmpeq
+ cmp r0, #1
+ bne 2f
+ // Z = 1, C = 1
+ movs r0, #2
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+2:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ ITT(eq)
+ moveq ip, #0
+ beq 1f
+
+ ldm sp, {r0-r3}
+ bl __aeabi_dcmpeq
+ cmp r0, #1
+ ITE(eq)
+ moveq ip, #(APSR_C | APSR_Z)
+ movne ip, #(APSR_C)
+
+1:
+ msr APSR_nzcvq, ip
+ pop {r0-r3}
+ POP_PC()
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+
+// int __aeabi_cdrcmple(double a, double b) {
+// return __aeabi_cdcmple(b, a);
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+ // Swap r0 and r2
+ mov ip, r0
+ mov r0, r2
+ mov r2, ip
+
+ // Swap r1 and r3
+ mov ip, r1
+ mov r1, r3
+ mov r3, ip
+
+ b __aeabi_cdcmple
+END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
new file mode 100644
index 000000000000..7578433a1df7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+#include "../int_lib.h"
+
+AEABI_RTABI __attribute__((visibility("hidden")))
+int __aeabi_cdcmpeq_check_nan(double a, double b) {
+ return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
new file mode 100644
index 000000000000..4b1de997687f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
@@ -0,0 +1,140 @@
+//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cfcmpeq(float a, float b) {
+// if (isnan(a) || isnan(b)) {
+// Z = 0; C = 1;
+// } else {
+// __aeabi_cfcmple(a, b);
+// }
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+ push {r0-r3, lr}
+ bl __aeabi_cfcmpeq_check_nan
+ cmp r0, #1
+#if defined(USE_THUMB_1)
+ beq 1f
+ // NaN has been ruled out, so __aeabi_cfcmple can't trap
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_cfcmple
+ pop {r0-r3, pc}
+1:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ pop {r0-r3, lr}
+
+ // NaN has been ruled out, so __aeabi_cfcmple can't trap
+ // Use "it ne" + unconditional branch to guarantee a supported relocation if
+ // __aeabi_cfcmple is in a different section for some builds.
+ IT(ne)
+ bne __aeabi_cfcmple
+
+#if defined(USE_THUMB_2)
+ mov ip, #APSR_C
+ msr APSR_nzcvq, ip
+#else
+ msr APSR_nzcvq, #APSR_C
+#endif
+ JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+
+
+// void __aeabi_cfcmple(float a, float b) {
+// if (__aeabi_fcmplt(a, b)) {
+// Z = 0; C = 0;
+// } else if (__aeabi_fcmpeq(a, b)) {
+// Z = 1; C = 1;
+// } else {
+// Z = 0; C = 1;
+// }
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+ // Per the RTABI, this function must preserve r0-r11.
+ // Save lr in the same instruction for compactness
+ push {r0-r3, lr}
+
+ bl __aeabi_fcmplt
+ cmp r0, #1
+#if defined(USE_THUMB_1)
+ bne 1f
+ // Z = 0, C = 0
+ movs r0, #1
+ lsls r0, r0, #1
+ pop {r0-r3, pc}
+1:
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_fcmpeq
+ cmp r0, #1
+ bne 2f
+ // Z = 1, C = 1
+ movs r0, #2
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+2:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ ITT(eq)
+ moveq ip, #0
+ beq 1f
+
+ ldm sp, {r0-r3}
+ bl __aeabi_fcmpeq
+ cmp r0, #1
+ ITE(eq)
+ moveq ip, #(APSR_C | APSR_Z)
+ movne ip, #(APSR_C)
+
+1:
+ msr APSR_nzcvq, ip
+ pop {r0-r3}
+ POP_PC()
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+
+// int __aeabi_cfrcmple(float a, float b) {
+// return __aeabi_cfcmple(b, a);
+// }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+ // Swap r0 and r1
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+
+ b __aeabi_cfcmple
+END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
new file mode 100644
index 000000000000..43dde9a49597
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+#include "../int_lib.h"
+
+AEABI_RTABI __attribute__((visibility("hidden")))
+int __aeabi_cfcmpeq_check_nan(float a, float b) {
+ return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
new file mode 100644
index 000000000000..9fa78b461248
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
@@ -0,0 +1,52 @@
+//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+// int result = __{eq,lt,le,ge,gt}df2(a, b);
+// if (result {==,<,<=,>=,>} 0) {
+// return 1;
+// } else {
+// return 0;
+// }
+// }
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \
+ vmov d0, r0, r1 SEPARATOR \
+ vmov d1, r2, r3
+#else
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
+#define DEFINE_AEABI_DCMP(cond) \
+ .syntax unified SEPARATOR \
+ .p2align 2 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \
+ push { r4, lr } SEPARATOR \
+ CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \
+ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+ cmp r0, #0 SEPARATOR \
+ b ## cond 1f SEPARATOR \
+ movs r0, #0 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+1: SEPARATOR \
+ movs r0, #1 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c
new file mode 100644
index 000000000000..dc3031326e37
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c
@@ -0,0 +1,45 @@
+/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements the division by zero helper routines as specified by the
+ * Run-time ABI for the ARM Architecture.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/*
+ * RTABI 4.3.2 - Division by zero
+ *
+ * The *div0 functions:
+ * - Return the value passed to them as a parameter
+ * - Or, return a fixed value defined by the execution environment (such as 0)
+ * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return
+ *
+ * An application may provide its own implementations of the *div0 functions to
+ * for a particular behaviour from the *div and *divmod functions called out of
+ * line.
+ */
+
+#include "../int_lib.h"
+
+/* provide an unused declaration to pacify pendantic compilation */
+extern unsigned char declaration;
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_idiv0(int return_value) {
+ return return_value;
+}
+
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_ldiv0(long long return_value) {
+ return return_value;
+}
+#endif
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
new file mode 100644
index 000000000000..1254886086fb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+AEABI_RTABI fp_t
+__aeabi_dsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t
+__aeabi_drsub(fp_t a, fp_t b) {
+ return __aeabi_dsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
new file mode 100644
index 000000000000..ea5b96c21d57
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
@@ -0,0 +1,52 @@
+//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+// int result = __{eq,lt,le,ge,gt}sf2(a, b);
+// if (result {==,<,<=,>=,>} 0) {
+// return 1;
+// } else {
+// return 0;
+// }
+// }
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \
+ vmov s0, r0 SEPARATOR \
+ vmov s1, r1
+#else
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
+#define DEFINE_AEABI_FCMP(cond) \
+ .syntax unified SEPARATOR \
+ .p2align 2 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \
+ push { r4, lr } SEPARATOR \
+ CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \
+ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+ cmp r0, #0 SEPARATOR \
+ b ## cond 1f SEPARATOR \
+ movs r0, #0 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+1: SEPARATOR \
+ movs r0, #1 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
new file mode 100644
index 000000000000..34f2303745bc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "../fp_lib.h"
+
+AEABI_RTABI fp_t
+__aeabi_fsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t
+__aeabi_frsub(fp_t a, fp_t b) {
+ return __aeabi_fsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S
new file mode 100644
index 000000000000..9c9c80ab5a7b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S
@@ -0,0 +1,51 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+// int rem, quot;
+// quot = __divmodsi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if defined(USE_THUMB_1)
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__divsi3)
+ pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+#else // defined(USE_THUMB_1)
+ push { lr }
+ sub sp, sp, #4
+ mov r2, sp
+#if defined(__MINGW32__)
+ mov r3, r0
+ mov r0, r1
+ mov r1, r3
+#endif
+ bl SYMBOL_NAME(__divmodsi4)
+ ldr r1, [sp]
+ add sp, sp, #4
+ pop { pc }
+#endif // defined(USE_THUMB_1)
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S
new file mode 100644
index 000000000000..038ae5d723a3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S
@@ -0,0 +1,46 @@
+//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+// __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+// int64_t rem, quot;
+// quot = __divmoddi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+ push {r6, lr}
+ sub sp, sp, #16
+ add r6, sp, #8
+ str r6, [sp]
+#if defined(__MINGW32__)
+ movs r6, r0
+ movs r0, r2
+ movs r2, r6
+ movs r6, r1
+ movs r1, r3
+ movs r3, r6
+#endif
+ bl SYMBOL_NAME(__divmoddi4)
+ ldr r2, [sp, #8]
+ ldr r3, [sp, #12]
+ add sp, sp, #16
+ pop {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S
new file mode 100644
index 000000000000..e86d6113760e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S
@@ -0,0 +1,30 @@
+//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+#ifdef USE_THUMB_1
+ push {r7, lr}
+ bl memcmp
+ pop {r7, pc}
+#else
+ b memcmp
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S
new file mode 100644
index 000000000000..e83c5fd4dbb3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S
@@ -0,0 +1,30 @@
+//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+#ifdef USE_THUMB_1
+ push {r7, lr}
+ bl memcpy
+ pop {r7, pc}
+#else
+ b memcpy
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S
new file mode 100644
index 000000000000..ee28300e46f2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S
@@ -0,0 +1,29 @@
+//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+#ifdef USE_THUMB_1
+ push {r7, lr}
+ bl memmove
+ pop {r7, pc}
+#else
+ b memmove
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S
new file mode 100644
index 000000000000..0a678d7627e7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S
@@ -0,0 +1,50 @@
+//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+ mov r3, r1
+ mov r1, r2
+ mov r2, r3
+#ifdef USE_THUMB_1
+ push {r7, lr}
+ bl memset
+ pop {r7, pc}
+#else
+ b memset
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+ mov r2, r1
+ movs r1, #0
+#ifdef USE_THUMB_1
+ push {r7, lr}
+ bl memset
+ pop {r7, pc}
+#else
+ b memset
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S
new file mode 100644
index 000000000000..88a4a6d8bc12
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S
@@ -0,0 +1,58 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+// __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+// unsigned rem, quot;
+// quot = __udivmodsi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if defined(USE_THUMB_1)
+ cmp r0, r1
+ bcc LOCAL_LABEL(case_denom_larger)
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__aeabi_uidiv)
+ pop {r1, r2, r3}
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+LOCAL_LABEL(case_denom_larger):
+ movs r1, r0
+ movs r0, #0
+ JMP (lr)
+#else // defined(USE_THUMB_1)
+ push { lr }
+ sub sp, sp, #4
+ mov r2, sp
+#if defined(__MINGW32__)
+ mov r3, r0
+ mov r0, r1
+ mov r1, r3
+#endif
+ bl SYMBOL_NAME(__udivmodsi4)
+ ldr r1, [sp]
+ add sp, sp, #4
+ pop { pc }
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S
new file mode 100644
index 000000000000..be343b6bc826
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S
@@ -0,0 +1,46 @@
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+// uint64_t rem, quot;
+// quot = __udivmoddi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+ push {r6, lr}
+ sub sp, sp, #16
+ add r6, sp, #8
+ str r6, [sp]
+#if defined(__MINGW32__)
+ movs r6, r0
+ movs r0, r2
+ movs r2, r6
+ movs r6, r1
+ movs r1, r3
+ movs r3, r6
+#endif
+ bl SYMBOL_NAME(__udivmoddi4)
+ ldr r2, [sp, #8]
+ ldr r3, [sp, #12]
+ add sp, sp, #16
+ pop {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S
new file mode 100644
index 000000000000..e9db8bac7994
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S
@@ -0,0 +1,44 @@
+//===------- bswapdi2 - Implement bswapdi2 --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+//
+// extern uint64_t __bswapdi2(uint64_t);
+//
+// Reverse all the bytes in a 64-bit integer.
+//
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
+#if __ARM_ARCH < 6
+ // before armv6 does not have "rev" instruction
+ // r2 = rev(r0)
+ eor r2, r0, r0, ror #16
+ bic r2, r2, #0xff0000
+ mov r2, r2, lsr #8
+ eor r2, r2, r0, ror #8
+ // r0 = rev(r1)
+ eor r0, r1, r1, ror #16
+ bic r0, r0, #0xff0000
+ mov r0, r0, lsr #8
+ eor r0, r0, r1, ror #8
+#else
+ rev r2, r0 // r2 = rev(r0)
+ rev r0, r1 // r0 = rev(r1)
+#endif
+ mov r1, r2 // r1 = r2 = rev(r0)
+ JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S
new file mode 100644
index 000000000000..1f6eed5c1bbf
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S
@@ -0,0 +1,36 @@
+//===------- bswapsi2 - Implement bswapsi2 --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+//
+// extern uint32_t __bswapsi2(uint32_t);
+//
+// Reverse all the bytes in a 32-bit integer.
+//
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
+#if __ARM_ARCH < 6
+ // before armv6 does not have "rev" instruction
+ eor r1, r0, r0, ror #16
+ bic r1, r1, #0xff0000
+ mov r1, r1, lsr #8
+ eor r0, r1, r0, ror #8
+#else
+ rev r0, r0
+#endif
+ JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/chkstk.S b/contrib/compiler-rt/lib/builtins/arm/chkstk.S
new file mode 100644
index 000000000000..e3002105897e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// __chkstk routine
+// This routine is windows specific.
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// This clobbers the register r12, and the condition codes, and uses r5 and r6
+// as temporaries by backing them up and restoring them afterwards.
+// Does not modify any memory or the stack pointer.
+
+// movw r4, #256 // Number of bytes of stack, in units of 4 byte
+// bl __chkstk
+// sub.w sp, sp, r4
+
+#define PAGE_SIZE 4096
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+ lsl r4, r4, #2
+ mov r12, sp
+ push {r5, r6}
+ mov r5, r4
+1:
+ sub r12, r12, #PAGE_SIZE
+ subs r5, r5, #PAGE_SIZE
+ ldr r6, [r12]
+ bgt 1b
+
+ pop {r5, r6}
+ bx lr
+END_COMPILERRT_FUNCTION(__chkstk)
diff --git a/contrib/compiler-rt/lib/builtins/arm/clzdi2.S b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S
new file mode 100644
index 000000000000..fc03b385cdfa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S
@@ -0,0 +1,93 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+ cmp r0, 0
+ itee ne
+ clzne r0, r0
+ clzeq r0, r1
+ addeq r0, r0, 32
+#else
+ cmp r1, 0
+ itee ne
+ clzne r0, r1
+ clzeq r0, r0
+ addeq r0, r0, 32
+#endif
+ JMP(lr)
+#else
+ /* Assumption: n != 0 */
+
+ /*
+ * r0: n
+ * r1: upper half of n, overwritten after check
+ * r1: count of leading zeros in n + 1
+ * r2: scratch register for shifted r0
+ */
+#ifdef __ARMEB__
+ cmp r0, 0
+ moveq r0, r1
+#else
+ cmp r1, 0
+ movne r0, r1
+#endif
+ movne r1, 1
+ moveq r1, 33
+
+ /*
+ * Basic block:
+ * if ((r0 >> SHIFT) == 0)
+ * r1 += SHIFT;
+ * else
+ * r0 >>= SHIFT;
+ * for descending powers of two as SHIFT.
+ */
+#define BLOCK(shift) \
+ lsrs r2, r0, shift; \
+ movne r0, r2; \
+ addeq r1, shift \
+
+ BLOCK(16)
+ BLOCK(8)
+ BLOCK(4)
+ BLOCK(2)
+
+ /*
+ * The basic block invariants at this point are (r0 >> 2) == 0 and
+ * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+ *
+ * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+ * ---+----------------+----------------+------------+--------------
+ * 1 | 1 | 0 | 0 | 1
+ * 2 | 0 | 1 | -1 | 0
+ * 3 | 0 | 1 | -1 | 0
+ *
+ * The r1's initial value of 1 compensates for the 1 here.
+ */
+ sub r0, r1, r0, lsr #1
+
+ JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/clzsi2.S b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S
new file mode 100644
index 000000000000..f2ce59c90119
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S
@@ -0,0 +1,73 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#ifdef __ARM_FEATURE_CLZ
+ clz r0, r0
+ JMP(lr)
+#else
+ /* Assumption: n != 0 */
+
+ /*
+ * r0: n
+ * r1: count of leading zeros in n + 1
+ * r2: scratch register for shifted r0
+ */
+ mov r1, 1
+
+ /*
+ * Basic block:
+ * if ((r0 >> SHIFT) == 0)
+ * r1 += SHIFT;
+ * else
+ * r0 >>= SHIFT;
+ * for descending powers of two as SHIFT.
+ */
+
+#define BLOCK(shift) \
+ lsrs r2, r0, shift; \
+ movne r0, r2; \
+ addeq r1, shift \
+
+ BLOCK(16)
+ BLOCK(8)
+ BLOCK(4)
+ BLOCK(2)
+
+ /*
+ * The basic block invariants at this point are (r0 >> 2) == 0 and
+ * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+ *
+ * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+ * ---+----------------+----------------+------------+--------------
+ * 1 | 1 | 0 | 0 | 1
+ * 2 | 0 | 1 | -1 | 0
+ * 3 | 0 | 1 | -1 | 0
+ *
+ * The r1's initial value of 1 compensates for the 1 here.
+ */
+ sub r0, r1, r0, lsr #1
+
+ JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
new file mode 100644
index 000000000000..c6c4cc067f07
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
@@ -0,0 +1,296 @@
+//===-- comparesf2.S - Implement single-precision soft-float comparisons --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+// __eqsf2 __gesf2 __unordsf2
+// __lesf2 __gtsf2
+// __ltsf2
+// __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, with multiple names.
+//
+// The routines behave as follows:
+//
+// __lesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __gesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unordsf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+@ int __eqsf2(float a, float b)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
+ // Make copies of a and b with the sign bit shifted off the top. These will
+ // be used to detect zeros and NaNs.
+#if defined(USE_THUMB_1)
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+#else
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+#endif
+
+ // We do the comparison in three stages (ignoring NaN values for the time
+ // being). First, we orr the absolute values of a and b; this sets the Z
+ // flag if both a and b are zero (of either sign). The shift of r3 doesn't
+ // effect this at all, but it *does* make sure that the C flag is clear for
+ // the subsequent operations.
+#if defined(USE_THUMB_1)
+ lsrs r6, r3, #1
+ orrs r6, r2
+#else
+ orrs r12, r2, r3, lsr #1
+#endif
+ // Next, we check if a and b have the same or different signs. If they have
+ // opposite signs, this eor will set the N flag.
+#if defined(USE_THUMB_1)
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+#else
+ it ne
+ eorsne r12, r0, r1
+#endif
+
+ // If a and b are equal (either both zeros or bit identical; again, we're
+ // ignoring NaNs for now), this subtract will zero out r0. If they have the
+ // same sign, the flags are updated as they would be for a comparison of the
+ // absolute values of a and b.
+#if defined(USE_THUMB_1)
+ bmi 1f
+ subs r0, r2, r3
+1:
+#else
+ it pl
+ subspl r0, r2, r3
+#endif
+
+ // If a is smaller in magnitude than b and both have the same sign, place
+ // the negation of the sign of b in r0. Thus, if both are negative and
+ // a > b, this sets r0 to 0; if both are positive and a < b, this sets
+ // r0 to -1.
+ //
+ // This is also done if a and b have opposite signs and are not both zero,
+ // because in that case the subtract was not performed and the C flag is
+ // still clear from the shift argument in orrs; if a is positive and b
+ // negative, this places 0 in r0; if a is negative and b positive, -1 is
+ // placed in r0.
+#if defined(USE_THUMB_1)
+ bhs 1f
+ // Here if a and b have the same sign and absA < absB, the result is thus
+ // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN)
+1:
+#else
+ it lo
+ mvnlo r0, r1, asr #31
+#endif
+
+ // If a is greater in magnitude than b and both have the same sign, place
+ // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed
+ // in r0, which is the desired result. Conversely, if both are positive
+ // and a > b, zero is placed in r0.
+#if defined(USE_THUMB_1)
+ bls 1f
+ // Here both have the same sign and absA > absB.
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+1:
+#else
+ it hi
+ movhi r0, r1, asr #31
+#endif
+
+ // If you've been keeping track, at this point r0 contains -1 if a < b and
+ // 0 if a >= b. All that remains to be done is to set it to 1 if a > b.
+ // If a == b, then the Z flag is set, so we can get the correct final value
+ // into r0 by simply or'ing with 1 if Z is clear.
+ // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if !defined(USE_THUMB_1)
+ it ne
+ orrne r0, r0, #1
+#endif
+
+ // Finally, we need to deal with NaNs. If either argument is NaN, replace
+ // the value in r0 with 1.
+#if defined(USE_THUMB_1)
+LOCAL_LABEL(CHECK_NAN):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 1f
+ cmp r3, r6
+1:
+ bls 2f
+ movs r0, #1
+2:
+ pop {r6, pc}
+#else
+ cmp r2, #0xff000000
+ ite ls
+ cmpls r3, #0xff000000
+ movhi r0, #1
+ JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__eqsf2)
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
+
+@ int __gtsf2(float a, float b)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2)
+ // Identical to the preceding except in that we return -1 for NaN values.
+ // Given that the two paths share so much code, one might be tempted to
+ // unify them; however, the extra code needed to do so makes the code size
+ // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
+#if defined(USE_THUMB_1)
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ lsrs r6, r3, #1
+ orrs r6, r2
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+ bmi 2f
+ subs r0, r2, r3
+2:
+ bhs 3f
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN_2)
+3:
+ bls 4f
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 5f
+ cmp r3, r6
+5:
+ bls 6f
+ movs r0, #1
+ negs r0, r0
+6:
+ pop {r6, pc}
+#else
+ mov r2, r0, lsl #1
+ mov r3, r1, lsl #1
+ orrs r12, r2, r3, lsr #1
+ it ne
+ eorsne r12, r0, r1
+ it pl
+ subspl r0, r2, r3
+ it lo
+ mvnlo r0, r1, asr #31
+ it hi
+ movhi r0, r1, asr #31
+ it ne
+ orrne r0, r0, #1
+ cmp r2, #0xff000000
+ ite ls
+ cmpls r3, #0xff000000
+ movhi r0, #-1
+ JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__gtsf2)
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
+
+@ int __unordsf2(float a, float b)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
+ // Return 1 for NaN values, 0 otherwise.
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ movs r0, #0
+#if defined(USE_THUMB_1)
+ movs r1, #0xff
+ lsls r1, #24
+ cmp r2, r1
+ bhi 1f
+ cmp r3, r1
+1:
+ bls 2f
+ movs r0, #1
+2:
+#else
+ cmp r2, #0xff000000
+ ite ls
+ cmpls r3, #0xff000000
+ movhi r0, #1
+#endif
+ JMP(lr)
+END_COMPILERRT_FUNCTION(__unordsf2)
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+ vmov s0, r0
+ vmov s1, r1
+ b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S
new file mode 100644
index 000000000000..776ba4f24b47
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S
@@ -0,0 +1,33 @@
+//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __divdf3vfp(double a, double b);
+//
+// Divides two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f64 d0, d0, d1
+#else
+ vmov d6, r0, r1 // move first param from r0/r1 pair into d6
+ vmov d7, r2, r3 // move second param from r2/r3 pair into d7
+ vdiv.f64 d5, d6, d7
+ vmov r0, r1, d5 // move result back to r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__divdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S
new file mode 100644
index 000000000000..8a027b741efe
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S
@@ -0,0 +1,71 @@
+/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divmodsi4 (32-bit signed integer divide and
+ * modulus) function for the ARM architecture. A naive digit-by-digit
+ * computation is employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4-r7, lr} ;\
+ add r7, sp, #12
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4-r7, pc}
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+@ int __divmodsi4(int divident, int divisor, int *remainder)
+@ Calculate the quotient and remainder of the (signed) division. The return
+@ value is the quotient, the remainder is placed in the variable.
+
+ .p2align 3
+DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ mov r3, r0
+ sdiv r0, r3, r1
+ mls r1, r0, r1, r3
+ str r1, [r2]
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
+ ESTABLISH_FRAME
+// Set aside the sign of the quotient and modulus, and the address for the
+// modulus.
+ eor r4, r0, r1
+ mov r5, r0
+ mov r6, r2
+// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+ eor ip, r0, r0, asr #31
+ eor lr, r1, r1, asr #31
+ sub r0, ip, r0, asr #31
+ sub r1, lr, r1, asr #31
+// Unsigned divmod:
+ bl SYMBOL_NAME(__udivmodsi4)
+// Apply the sign of quotient and modulus
+ ldr r1, [r6]
+ eor r0, r0, r4, asr #31
+ eor r1, r1, r5, asr #31
+ sub r0, r0, r4, asr #31
+ sub r1, r1, r5, asr #31
+ str r1, [r6]
+ CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S
new file mode 100644
index 000000000000..130318f0c37b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __divsf3vfp(float a, float b);
+//
+// Divides two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f32 s0, s0, s1
+#else
+ vmov s14, r0 // move first param from r0 into float register
+ vmov s15, r1 // move second param from r1 into float register
+ vdiv.f32 s13, s14, s15
+ vmov r0, s13 // move result back to r0
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__divsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divsi3.S b/contrib/compiler-rt/lib/builtins/arm/divsi3.S
new file mode 100644
index 000000000000..19757af177eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divsi3.S
@@ -0,0 +1,82 @@
+/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divsi3 (32-bit signed integer divide) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4, r7, lr} ;\
+ add r7, sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4, r7, pc}
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+ .p2align 3
+// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3)
+
+@ int __divsi3(int divident, int divisor)
+@ Calculate and return the quotient of the (signed) division.
+
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1,r1
+ beq LOCAL_LABEL(divzero)
+ sdiv r0, r0, r1
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0,#0
+ bx lr
+#else
+ESTABLISH_FRAME
+// Set aside the sign of the quotient.
+# if defined(USE_THUMB_1)
+ movs r4, r0
+ eors r4, r1
+# else
+ eor r4, r0, r1
+# endif
+// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+# if defined(USE_THUMB_1)
+ asrs r2, r0, #31
+ asrs r3, r1, #31
+ eors r0, r2
+ eors r1, r3
+ subs r0, r0, r2
+ subs r1, r1, r3
+# else
+ eor r2, r0, r0, asr #31
+ eor r3, r1, r1, asr #31
+ sub r0, r2, r0, asr #31
+ sub r1, r3, r1, asr #31
+# endif
+// abs(a) / abs(b)
+ bl SYMBOL_NAME(__udivsi3)
+// Apply sign of quotient to result and return.
+# if defined(USE_THUMB_1)
+ asrs r4, #31
+ eors r0, r4
+ subs r0, r0, r4
+# else
+ eor r0, r0, r4, asr #31
+ sub r0, r0, r4, asr #31
+# endif
+ CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S
new file mode 100644
index 000000000000..d50706570916
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqdf2vfp(double a, double b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(eq)
+ moveq r0, #1 // set result register to 1 if equal
+ movne r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__eqdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S
new file mode 100644
index 000000000000..fd72b2fdbdee
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqsf2vfp(float a, float b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(eq)
+ moveq r0, #1 // set result register to 1 if equal
+ movne r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__eqsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
new file mode 100644
index 000000000000..1079f977bae6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
@@ -0,0 +1,33 @@
+//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __extendsfdf2vfp(float a);
+//
+// Converts single precision float to double precision result.
+// Uses Darwin calling convention where a single precision parameter is
+// passed in a GPR and a double precision result is returned in R0/R1 pair.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f64.f32 d0, s0
+#else
+ vmov s15, r0 // load float register from R0
+ vcvt.f64.f32 d7, s15 // convert single to double
+ vmov r0, r1, d7 // return result in r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S
new file mode 100644
index 000000000000..5d7b0f856549
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S
@@ -0,0 +1,34 @@
+//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a double precision parameter is
+// passed in GPR register pair.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f64 s0, d0
+ vmov r0, s0
+#else
+ vmov d7, r0, r1 // load double register from R0/R1
+ vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__fixdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S
new file mode 100644
index 000000000000..805a277afa34
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S
@@ -0,0 +1,34 @@
+//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixsfsivfp(float a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision parameter is
+// passed in a GPR..
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f32 s0, s0
+ vmov r0, s0
+#else
+ vmov s15, r0 // load float register from R0
+ vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__fixsfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
new file mode 100644
index 000000000000..4f1b2c8cefdc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
@@ -0,0 +1,35 @@
+//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunsdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit unsigned int rounding towards
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a double precision parameter is
+// passed in GPR register pair.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f64 s0, d0
+ vmov r0, s0
+#else
+ vmov d7, r0, r1 // load double register from R0/R1
+ vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S
new file mode 100644
index 000000000000..e5d778236879
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S
@@ -0,0 +1,35 @@
+//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunssfsivfp(float a);
+//
+// Converts single precision float to a 32-bit unsigned int rounding towards
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a single precision parameter is
+// passed in a GPR..
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f32 s0, s0
+ vmov r0, s0
+#else
+ vmov s15, r0 // load float register from R0
+ vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__fixunssfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S
new file mode 100644
index 000000000000..3297ad44d8cd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatsidfvfp(int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is
+// return in GPR register pair.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.s32 d0, s0
+#else
+ vmov s15, r0 // move int to float register s15
+ vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7
+ vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__floatsidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S
new file mode 100644
index 000000000000..65408b54b8d4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatsisfvfp(int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is
+// return in a GPR..
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.s32 s0, s0
+#else
+ vmov s15, r0 // move int to float register s15
+ vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__floatsisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
new file mode 100644
index 000000000000..d7a7024a25b8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatunssidfvfp(unsigned int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is
+// return in GPR register pair.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.u32 d0, s0
+#else
+ vmov s15, r0 // move int to float register s15
+ vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7
+ vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__floatunssidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S
new file mode 100644
index 000000000000..1ca856519a92
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatunssisfvfp(unsigned int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is
+// return in a GPR..
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.u32 s0, s0
+#else
+ vmov s15, r0 // move int to float register s15
+ vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15
+ vmov r0, s15 // move s15 to result register
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__floatunssisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S
new file mode 100644
index 000000000000..364fc5b24cd1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gedf2vfp(double a, double b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ge)
+ movge r0, #1 // set result register to 1 if greater than or equal
+ movlt r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__gedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S
new file mode 100644
index 000000000000..346c3473ae4c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gesf2vfp(float a, float b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ge)
+ movge r0, #1 // set result register to 1 if greater than or equal
+ movlt r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__gesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S
new file mode 100644
index 000000000000..3389c3ad9737
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __gtdf2vfp(double a, double b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(gt)
+ movgt r0, #1 // set result register to 1 if equal
+ movle r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__gtdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S
new file mode 100644
index 000000000000..afdba8b018e2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gtsf2vfp(float a, float b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(gt)
+ movgt r0, #1 // set result register to 1 if equal
+ movle r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__gtsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S
new file mode 100644
index 000000000000..4bbe4c86837c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ledf2vfp(double a, double b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ls)
+ movls r0, #1 // set result register to 1 if equal
+ movhi r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__ledf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S
new file mode 100644
index 000000000000..51232bd8cedc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __lesf2vfp(float a, float b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ls)
+ movls r0, #1 // set result register to 1 if equal
+ movhi r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__lesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S
new file mode 100644
index 000000000000..8e2928c813d2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ltdf2vfp(double a, double b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(mi)
+ movmi r0, #1 // set result register to 1 if equal
+ movpl r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__ltdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S
new file mode 100644
index 000000000000..59c00c6bab67
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __ltsf2vfp(float a, float b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(mi)
+ movmi r0, #1 // set result register to 1 if equal
+ movpl r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__ltsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/modsi3.S b/contrib/compiler-rt/lib/builtins/arm/modsi3.S
new file mode 100644
index 000000000000..be263834d7f1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/modsi3.S
@@ -0,0 +1,60 @@
+/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __modsi3 (32-bit signed integer modulus) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4, r7, lr} ;\
+ add r7, sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4, r7, pc}
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+@ int __modsi3(int divident, int divisor)
+@ Calculate and return the remainder of the (signed) division.
+
+ .p2align 3
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ sdiv r2, r0, r1
+ mls r0, r2, r1, r0
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
+ ESTABLISH_FRAME
+ // Set aside the sign of the dividend.
+ mov r4, r0
+ // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+ eor r2, r0, r0, asr #31
+ eor r3, r1, r1, asr #31
+ sub r0, r2, r0, asr #31
+ sub r1, r3, r1, asr #31
+ // abs(a) % abs(b)
+ bl SYMBOL_NAME(__umodsi3)
+ // Apply sign of dividend to result and return.
+ eor r0, r0, r4, asr #31
+ sub r0, r0, r4, asr #31
+ CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__modsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S
new file mode 100644
index 000000000000..aa7b23495034
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S
@@ -0,0 +1,33 @@
+//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __muldf3vfp(double a, double b);
+//
+// Multiplies two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f64 d0, d0, d1
+#else
+ vmov d6, r0, r1 // move first param from r0/r1 pair into d6
+ vmov d7, r2, r3 // move second param from r2/r3 pair into d7
+ vmul.f64 d6, d6, d7
+ vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__muldf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S
new file mode 100644
index 000000000000..a1da789dcade
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __mulsf3vfp(float a, float b);
+//
+// Multiplies two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f32 s0, s0, s1
+#else
+ vmov s14, r0 // move first param from r0 into float register
+ vmov s15, r1 // move second param from r1 into float register
+ vmul.f32 s13, s14, s15
+#endif
+ vmov r0, s13 // move result back to r0
+ bx lr
+END_COMPILERRT_FUNCTION(__mulsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S
new file mode 100644
index 000000000000..aef72eb00974
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S
@@ -0,0 +1,37 @@
+//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __nedf2vfp(double a, double b);
+//
+// Returns zero if a and b are unequal and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ne)
+ movne r0, #1 // set result register to 0 if unequal
+ moveq r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__nedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S
new file mode 100644
index 000000000000..81f0ab8eec1d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S
@@ -0,0 +1,30 @@
+//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __negdf2vfp(double a, double b);
+//
+// Returns the negation a double precision floating point numbers using the
+// Darwin calling convention where double arguments are passsed in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f64 d0, d0
+#else
+ eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__negdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S
new file mode 100644
index 000000000000..46ab4a9cf164
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S
@@ -0,0 +1,30 @@
+//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __negsf2vfp(float a);
+//
+// Returns the negation of a single precision floating point numbers using the
+// Darwin calling convention where single arguments are passsed like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f32 s0, s0
+#else
+ eor r0, r0, #-2147483648 // flip sign bit on float in r0
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__negsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S
new file mode 100644
index 000000000000..50d60f493005
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __nesf2vfp(float a, float b);
+//
+// Returns one iff a != b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(ne)
+ movne r0, #1 // set result register to 1 if unequal
+ moveq r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__nesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S
new file mode 100644
index 000000000000..0692cf3e1b77
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S
@@ -0,0 +1,35 @@
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog. But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers. In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+ .text
+ .syntax unified
+
+//
+// Restore registers d8-d15 from stack
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs)
+ vldmia sp!, {d8-d15} // pop registers d8-d15 off stack
+ bx lr // return to prolog
+END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S
new file mode 100644
index 000000000000..544dd5467a4d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S
@@ -0,0 +1,35 @@
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog. But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers. In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+ .text
+ .syntax unified
+
+//
+// Save registers d8-d15 onto stack
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs)
+ vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack
+ bx lr // return to prolog
+END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list
new file mode 100644
index 000000000000..cc6a4b3cdd2e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list
@@ -0,0 +1,21 @@
+#
+# These are soft float functions which can be
+# aliased to the *vfp functions on arm processors
+# that support floating point instructions.
+#
+___adddf3vfp ___adddf3
+___addsf3vfp ___addsf3
+___divdf3vfp ___divdf3
+___divsf3vfp ___divsf3
+___extendsfdf2vfp ___extendsfdf2
+___fixdfsivfp ___fixdfsi
+___fixsfsivfp ___fixsfsi
+___floatsidfvfp ___floatsidf
+___floatsisfvfp ___floatsisf
+___muldf3vfp ___muldf3
+___mulsf3vfp ___mulsf3
+___subdf3vfp ___subdf3
+___subsf3vfp ___subsf3
+___truncdfsf2vfp ___truncdfsf2
+___floatunssidfvfp ___floatunsidf
+___floatunssisfvfp ___floatunsisf
diff --git a/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S
new file mode 100644
index 000000000000..2b6f2bdbfdd5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S
@@ -0,0 +1,33 @@
+//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __subdf3vfp(double a, double b);
+//
+// Returns difference between two double precision floating point numbers using
+// the Darwin calling convention where double arguments are passsed in GPR pairs
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f64 d0, d0, d1
+#else
+ vmov d6, r0, r1 // move first param from r0/r1 pair into d6
+ vmov d7, r2, r3 // move second param from r2/r3 pair into d7
+ vsub.f64 d6, d6, d7
+ vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__subdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S
new file mode 100644
index 000000000000..3e83ea26507d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S
@@ -0,0 +1,34 @@
+//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __subsf3vfp(float a, float b);
+//
+// Returns the difference between two single precision floating point numbers
+// using the Darwin calling convention where single arguments are passsed
+// like 32-bit ints.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f32 s0, s0, s1
+#else
+ vmov s14, r0 // move first param from r0 into float register
+ vmov s15, r1 // move second param from r1 into float register
+ vsub.f32 s14, s14, s15
+ vmov r0, s14 // move result back to r0
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__subsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch16.S b/contrib/compiler-rt/lib/builtins/arm/switch16.S
new file mode 100644
index 000000000000..df9e38e176ce
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch16.S
@@ -0,0 +1,46 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement. On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to. If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+ .text
+ .syntax unified
+
+//
+// The table contains signed 2-byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
+ ldrh ip, [lr, #-1] // get first 16-bit word in table
+ cmp r0, ip // compare with index
+ add r0, lr, r0, lsl #1 // compute address of element in table
+ add ip, lr, ip, lsl #1 // compute address of last element in table
+ ite lo
+ ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range
+ ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range
+ add ip, lr, r0, lsl #1 // compute label = lr + element*2
+ bx ip // jump to computed label
+END_COMPILERRT_FUNCTION(__switch16)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch32.S b/contrib/compiler-rt/lib/builtins/arm/switch32.S
new file mode 100644
index 000000000000..d97b5361436d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch32.S
@@ -0,0 +1,46 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement. On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to. If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+ .text
+ .syntax unified
+
+//
+// The table contains signed 4-byte sized elements which are the distance
+// from lr to the target label.
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
+ ldr ip, [lr, #-1] // get first 32-bit word in table
+ cmp r0, ip // compare with index
+ add r0, lr, r0, lsl #2 // compute address of element in table
+ add ip, lr, ip, lsl #2 // compute address of last element in table
+ ite lo
+ ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range
+ ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range
+ add ip, lr, r0 // compute label = lr + element
+ bx ip // jump to computed label
+END_COMPILERRT_FUNCTION(__switch32)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch8.S b/contrib/compiler-rt/lib/builtins/arm/switch8.S
new file mode 100644
index 000000000000..4d9e0eaff845
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch8.S
@@ -0,0 +1,44 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement. On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to. If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+ .text
+ .syntax unified
+
+//
+// The table contains signed byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
+ ldrb ip, [lr, #-1] // get first byte in table
+ cmp r0, ip // signed compare with index
+ ite lo
+ ldrsblo r0, [lr, r0] // get indexed byte out of table
+ ldrsbhs r0, [lr, ip] // if out of range, use last entry in table
+ add ip, lr, r0, lsl #1 // compute label = lr + element*2
+ bx ip // jump to computed label
+END_COMPILERRT_FUNCTION(__switch8)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switchu8.S b/contrib/compiler-rt/lib/builtins/arm/switchu8.S
new file mode 100644
index 000000000000..4ffe35f0549b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switchu8.S
@@ -0,0 +1,44 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement. On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to. If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+ .text
+ .syntax unified
+
+//
+// The table contains unsigned byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
+ ldrb ip, [lr, #-1] // get first byte in table
+ cmp r0, ip // compare with index
+ ite lo
+ ldrblo r0, [lr, r0] // get indexed byte out of table
+ ldrbhs r0, [lr, ip] // if out of range, use last entry in table
+ add ip, lr, r0, lsl #1 // compute label = lr + element*2
+ bx ip // jump to computed label
+END_COMPILERRT_FUNCTION(__switchu8)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync-ops.h b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h
new file mode 100644
index 000000000000..ee02c30c6eaa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h
@@ -0,0 +1,64 @@
+/*===-- sync-ops.h - --===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements outline macros for the __sync_fetch_and_*
+ * operations. Different instantiations will generate appropriate assembly for
+ * ARM and Thumb-2 versions of the functions.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define SYNC_OP_4(op) \
+ .p2align 2 ; \
+ .thumb ; \
+ .syntax unified ; \
+ DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+ dmb ; \
+ mov r12, r0 ; \
+ LOCAL_LABEL(tryatomic_ ## op): \
+ ldrex r0, [r12] ; \
+ op(r2, r0, r1) ; \
+ strex r3, r2, [r12] ; \
+ cmp r3, #0 ; \
+ bne LOCAL_LABEL(tryatomic_ ## op) ; \
+ dmb ; \
+ bx lr
+
+#define SYNC_OP_8(op) \
+ .p2align 2 ; \
+ .thumb ; \
+ .syntax unified ; \
+ DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+ push {r4, r5, r6, lr} ; \
+ dmb ; \
+ mov r12, r0 ; \
+ LOCAL_LABEL(tryatomic_ ## op): \
+ ldrexd r0, r1, [r12] ; \
+ op(r4, r5, r0, r1, r2, r3) ; \
+ strexd r6, r4, r5, [r12] ; \
+ cmp r6, #0 ; \
+ bne LOCAL_LABEL(tryatomic_ ## op) ; \
+ dmb ; \
+ pop {r4, r5, r6, pc}
+
+#define MINMAX_4(rD, rN, rM, cmp_kind) \
+ cmp rN, rM ; \
+ mov rD, rM ; \
+ it cmp_kind ; \
+ mov##cmp_kind rD, rN
+
+#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
+ cmp rN_LO, rM_LO ; \
+ sbcs rN_HI, rM_HI ; \
+ mov rD_LO, rM_LO ; \
+ mov rD_HI, rM_HI ; \
+ itt cmp_kind ; \
+ mov##cmp_kind rD_LO, rN_LO ; \
+ mov##cmp_kind rD_HI, rN_HI
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S
new file mode 100644
index 000000000000..7877d6c46c11
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S
@@ -0,0 +1,23 @@
+/*===-- sync_fetch_and_add_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "adds" is 2 bytes shorter than "add". */
+#define add_4(rD, rN, rM) add rD, rN, rM
+
+SYNC_OP_4(add_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S
new file mode 100644
index 000000000000..1df07a342a1b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_add_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ adds rD_LO, rN_LO, rM_LO ; \
+ adc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(add_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S
new file mode 100644
index 000000000000..720ff02279cd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_and_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define and_4(rD, rN, rM) and rD, rN, rM
+
+SYNC_OP_4(and_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S
new file mode 100644
index 000000000000..4f7b5ca7ab29
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_and_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ and rD_LO, rN_LO, rM_LO ; \
+ and rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(and_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S
new file mode 100644
index 000000000000..43da9c7d4067
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_max_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt)
+
+SYNC_OP_4(max_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S
new file mode 100644
index 000000000000..898fc6202ac8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_max_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt)
+
+SYNC_OP_8(max_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S
new file mode 100644
index 000000000000..bba31a03aace
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_min_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt)
+
+SYNC_OP_4(min_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S
new file mode 100644
index 000000000000..e7ccf9fb60ef
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_min_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt)
+
+SYNC_OP_8(min_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S
new file mode 100644
index 000000000000..c13dd394588f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define nand_4(rD, rN, rM) bic rD, rN, rM
+
+SYNC_OP_4(nand_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S
new file mode 100644
index 000000000000..e8107ab3a33c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ bic rD_LO, rN_LO, rM_LO ; \
+ bic rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(nand_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S
new file mode 100644
index 000000000000..6726571a944f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_or_4.S - -------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define or_4(rD, rN, rM) orr rD, rN, rM
+
+SYNC_OP_4(or_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S
new file mode 100644
index 000000000000..f7f162c7c3b3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_or_8.S - -------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ orr rD_LO, rN_LO, rM_LO ; \
+ orr rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(or_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S
new file mode 100644
index 000000000000..b9326b14cdd5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S
@@ -0,0 +1,23 @@
+/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "subs" is 2 bytes shorter than "sub". */
+#define sub_4(rD, rN, rM) sub rD, rN, rM
+
+SYNC_OP_4(sub_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S
new file mode 100644
index 000000000000..6ce743e5ee9f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ subs rD_LO, rN_LO, rM_LO ; \
+ sbc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(sub_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S
new file mode 100644
index 000000000000..b8d19ff35057
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi)
+
+SYNC_OP_4(umax_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S
new file mode 100644
index 000000000000..34442fd77454
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi)
+
+SYNC_OP_8(umax_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S
new file mode 100644
index 000000000000..0998e3e10f58
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo)
+
+SYNC_OP_4(umin_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S
new file mode 100644
index 000000000000..558f91390512
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo)
+
+SYNC_OP_8(umin_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S
new file mode 100644
index 000000000000..824f49146880
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define xor_4(rD, rN, rM) eor rD, rN, rM
+
+SYNC_OP_4(xor_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S
new file mode 100644
index 000000000000..073fb9c20f25
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+ eor rD_LO, rN_LO, rM_LO ; \
+ eor rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(xor_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S
new file mode 100644
index 000000000000..61d1db910f0d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S
@@ -0,0 +1,38 @@
+//===-- sync_synchronize - Implement memory barrier * ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode
+// the compiler may emit a call to __sync_synchronize.
+// On Darwin the implementation jumps to an OS supplied function named
+// OSMemoryBarrier
+//
+
+ .text
+ .syntax unified
+
+#if __APPLE__
+
+ .p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize)
+ stmfd sp!, {r7, lr}
+ add r7, sp, #0
+ bl _OSMemoryBarrier
+ ldmfd sp!, {r7, pc}
+END_COMPILERRT_FUNCTION(__sync_synchronize)
+
+ // tell linker it can break up file at label boundaries
+ .subsections_via_symbols
+
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
new file mode 100644
index 000000000000..682e54d3d294
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
@@ -0,0 +1,33 @@
+//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __truncdfsf2vfp(double a);
+//
+// Converts double precision float to signle precision result.
+// Uses Darwin calling convention where a double precision parameter is
+// passed in a R0/R1 pair and a signle precision result is returned in R0.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f32.f64 s0, d0
+#else
+ vmov d7, r0, r1 // load double from r0/r1 pair
+ vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
+ vmov r0, s15 // return result in r0
+#endif
+ bx lr
+END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S
new file mode 100644
index 000000000000..ee3950c9b0eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S
@@ -0,0 +1,180 @@
+/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
+ * modulus) function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
+@ unsigned int *remainder)
+@ Calculate the quotient and remainder of the (unsigned) division. The return
+@ value is the quotient, the remainder is placed in the variable.
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divby0)
+ mov r3, r0
+ udiv r0, r3, r1
+ mls r1, r0, r1, r3
+ str r1, [r2]
+ bx lr
+#else
+ cmp r1, #1
+ bcc LOCAL_LABEL(divby0)
+ beq LOCAL_LABEL(divby1)
+ cmp r0, r1
+ bcc LOCAL_LABEL(quotient0)
+ /*
+ * Implement division using binary long division algorithm.
+ *
+ * r0 is the numerator, r1 the denominator.
+ *
+ * The code before JMP computes the correct shift I, so that
+ * r0 and (r1 << I) have the highest bit set in the same position.
+ * At the time of JMP, ip := .Ldiv0block - 12 * I.
+ * This depends on the fixed instruction size of block.
+ * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+ *
+ * block(shift) implements the test-and-update-quotient core.
+ * It assumes (r0 << shift) can be computed without overflow and
+ * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+ */
+
+# ifdef __ARM_FEATURE_CLZ
+ clz ip, r0
+ clz r3, r1
+ /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+ sub r3, r3, ip
+# if defined(USE_THUMB_2)
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ sub ip, ip, r3, lsl #2
+ sub ip, ip, r3, lsl #3
+ mov r3, #0
+ bx ip
+# else
+# if defined(USE_THUMB_2)
+# error THUMB mode requires CLZ or UDIV
+# endif
+ str r4, [sp, #-8]!
+
+ mov r4, r0
+ adr ip, LOCAL_LABEL(div0block)
+
+ lsr r3, r4, #16
+ cmp r3, r1
+ movhs r4, r3
+ subhs ip, ip, #(16 * 12)
+
+ lsr r3, r4, #8
+ cmp r3, r1
+ movhs r4, r3
+ subhs ip, ip, #(8 * 12)
+
+ lsr r3, r4, #4
+ cmp r3, r1
+ movhs r4, r3
+ subhs ip, #(4 * 12)
+
+ lsr r3, r4, #2
+ cmp r3, r1
+ movhs r4, r3
+ subhs ip, ip, #(2 * 12)
+
+ /* Last block, no need to update r3 or r4. */
+ cmp r1, r4, lsr #1
+ subls ip, ip, #(1 * 12)
+
+ ldr r4, [sp], #8 /* restore r4, we are done with it. */
+ mov r3, #0
+
+ JMP(ip)
+# endif
+
+#define IMM #
+
+#define block(shift) \
+ cmp r0, r1, lsl IMM shift; \
+ ITT(hs); \
+ WIDE(addhs) r3, r3, IMM (1 << shift); \
+ WIDE(subhs) r0, r0, r1, lsl IMM shift
+
+ block(31)
+ block(30)
+ block(29)
+ block(28)
+ block(27)
+ block(26)
+ block(25)
+ block(24)
+ block(23)
+ block(22)
+ block(21)
+ block(20)
+ block(19)
+ block(18)
+ block(17)
+ block(16)
+ block(15)
+ block(14)
+ block(13)
+ block(12)
+ block(11)
+ block(10)
+ block(9)
+ block(8)
+ block(7)
+ block(6)
+ block(5)
+ block(4)
+ block(3)
+ block(2)
+ block(1)
+LOCAL_LABEL(div0block):
+ block(0)
+
+ str r0, [r2]
+ mov r0, r3
+ JMP(lr)
+
+LOCAL_LABEL(quotient0):
+ str r0, [r2]
+ mov r0, #0
+ JMP(lr)
+
+LOCAL_LABEL(divby1):
+ mov r3, #0
+ str r3, [r2]
+ JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+ mov r0, #0
+#ifdef __ARM_EABI__
+ b __aeabi_idiv0
+#else
+ JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__udivmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/udivsi3.S b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S
new file mode 100644
index 000000000000..6dea27d404ff
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S
@@ -0,0 +1,264 @@
+/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivsi3 (32-bit unsigned integer divide)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+
+DEFINE_CODE_STATE
+
+ .p2align 2
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
+
+@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
+@ Calculate and return the quotient of the (unsigned) division.
+
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divby0)
+ udiv r0, r0, r1
+ bx lr
+
+LOCAL_LABEL(divby0):
+ mov r0, #0
+# ifdef __ARM_EABI__
+ b __aeabi_idiv0
+# else
+ JMP(lr)
+# endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
+ cmp r1, #1
+ bcc LOCAL_LABEL(divby0)
+#if defined(USE_THUMB_1)
+ bne LOCAL_LABEL(num_neq_denom)
+ JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
+ IT(eq)
+ JMPc(lr, eq)
+#endif
+ cmp r0, r1
+#if defined(USE_THUMB_1)
+ bhs LOCAL_LABEL(num_ge_denom)
+ movs r0, #0
+ JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
+ ITT(cc)
+ movcc r0, #0
+ JMPc(lr, cc)
+#endif
+
+ /*
+ * Implement division using binary long division algorithm.
+ *
+ * r0 is the numerator, r1 the denominator.
+ *
+ * The code before JMP computes the correct shift I, so that
+ * r0 and (r1 << I) have the highest bit set in the same position.
+ * At the time of JMP, ip := .Ldiv0block - 12 * I.
+ * This depends on the fixed instruction size of block.
+ * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+ *
+ * block(shift) implements the test-and-update-quotient core.
+ * It assumes (r0 << shift) can be computed without overflow and
+ * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+ */
+
+# if defined(__ARM_FEATURE_CLZ)
+ clz ip, r0
+ clz r3, r1
+ /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+ sub r3, r3, ip
+# if defined(USE_THUMB_2)
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ sub ip, ip, r3, lsl #2
+ sub ip, ip, r3, lsl #3
+ mov r3, #0
+ bx ip
+# else /* No CLZ Feature */
+# if defined(USE_THUMB_2)
+# error THUMB mode requires CLZ or UDIV
+# endif
+# if defined(USE_THUMB_1)
+# define BLOCK_SIZE 10
+# else
+# define BLOCK_SIZE 12
+# endif
+
+ mov r2, r0
+# if defined(USE_THUMB_1)
+ mov ip, r0
+ adr r0, LOCAL_LABEL(div0block)
+ adds r0, #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ lsrs r3, r2, #16
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_16)
+ movs r2, r3
+ subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(16 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #8
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_8)
+ movs r2, r3
+ subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(8 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #4
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_4)
+ movs r2, r3
+ subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+# else
+ movhs r2, r3
+ subhs ip, #(4 * BLOCK_SIZE)
+# endif
+
+ lsrs r3, r2, #2
+ cmp r3, r1
+# if defined(USE_THUMB_1)
+ blo LOCAL_LABEL(skip_2)
+ movs r2, r3
+ subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+# else
+ movhs r2, r3
+ subhs ip, ip, #(2 * BLOCK_SIZE)
+# endif
+
+ /* Last block, no need to update r2 or r3. */
+# if defined(USE_THUMB_1)
+ lsrs r3, r2, #1
+ cmp r3, r1
+ blo LOCAL_LABEL(skip_1)
+ subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+ movs r2, r0
+ mov r0, ip
+ movs r3, #0
+ JMP (r2)
+
+# else
+ cmp r1, r2, lsr #1
+ subls ip, ip, #(1 * BLOCK_SIZE)
+
+ movs r3, #0
+
+ JMP(ip)
+# endif
+# endif /* __ARM_FEATURE_CLZ */
+
+
+#define IMM #
+ /* due to the range limit of branch in Thumb1, we have to place the
+ block closer */
+LOCAL_LABEL(divby0):
+ movs r0, #0
+# if defined(__ARM_EABI__)
+ push {r7, lr}
+ bl __aeabi_idiv0 // due to relocation limit, can't use b.
+ pop {r7, pc}
+# else
+ JMP(lr)
+# endif
+
+
+#if defined(USE_THUMB_1)
+#define block(shift) \
+ lsls r2, r1, IMM shift; \
+ cmp r0, r2; \
+ blo LOCAL_LABEL(block_skip_##shift); \
+ subs r0, r0, r2; \
+ LOCAL_LABEL(block_skip_##shift) :; \
+ adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+ /* TODO: if current location counter is not not word aligned, we don't
+ need the .p2align and nop */
+ /* Label div0block must be word-aligned. First align block 31 */
+ .p2align 2
+ nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
+#define block(shift) \
+ cmp r0, r1, lsl IMM shift; \
+ ITT(hs); \
+ WIDE(addhs) r3, r3, IMM (1 << shift); \
+ WIDE(subhs) r0, r0, r1, lsl IMM shift
+#endif
+
+ block(31)
+ block(30)
+ block(29)
+ block(28)
+ block(27)
+ block(26)
+ block(25)
+ block(24)
+ block(23)
+ block(22)
+ block(21)
+ block(20)
+ block(19)
+ block(18)
+ block(17)
+ block(16)
+ block(15)
+ block(14)
+ block(13)
+ block(12)
+ block(11)
+ block(10)
+ block(9)
+ block(8)
+ block(7)
+ block(6)
+ block(5)
+ block(4)
+ block(3)
+ block(2)
+ block(1)
+LOCAL_LABEL(div0block):
+ block(0)
+
+ mov r0, r3
+ JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+END_COMPILERRT_FUNCTION(__udivsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/umodsi3.S b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S
new file mode 100644
index 000000000000..069fad34cb9c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S
@@ -0,0 +1,158 @@
+/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __umodsi3 (32-bit unsigned integer modulus)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+
+@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
+@ Calculate and return the remainder of the (unsigned) division.
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+#if __ARM_ARCH_EXT_IDIV__
+ tst r1, r1
+ beq LOCAL_LABEL(divby0)
+ udiv r2, r0, r1
+ mls r0, r2, r1, r0
+ bx lr
+#else
+ cmp r1, #1
+ bcc LOCAL_LABEL(divby0)
+ ITT(eq)
+ moveq r0, #0
+ JMPc(lr, eq)
+ cmp r0, r1
+ IT(cc)
+ JMPc(lr, cc)
+ /*
+ * Implement division using binary long division algorithm.
+ *
+ * r0 is the numerator, r1 the denominator.
+ *
+ * The code before JMP computes the correct shift I, so that
+ * r0 and (r1 << I) have the highest bit set in the same position.
+ * At the time of JMP, ip := .Ldiv0block - 8 * I.
+ * This depends on the fixed instruction size of block.
+ * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
+ *
+ * block(shift) implements the test-and-update-quotient core.
+ * It assumes (r0 << shift) can be computed without overflow and
+ * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+ */
+
+# ifdef __ARM_FEATURE_CLZ
+ clz ip, r0
+ clz r3, r1
+ /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+ sub r3, r3, ip
+# if defined(USE_THUMB_2)
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
+ adr ip, LOCAL_LABEL(div0block)
+# endif
+ sub ip, ip, r3, lsl #3
+ bx ip
+# else
+# if defined(USE_THUMB_2)
+# error THUMB mode requires CLZ or UDIV
+# endif
+ mov r2, r0
+ adr ip, LOCAL_LABEL(div0block)
+
+ lsr r3, r2, #16
+ cmp r3, r1
+ movhs r2, r3
+ subhs ip, ip, #(16 * 8)
+
+ lsr r3, r2, #8
+ cmp r3, r1
+ movhs r2, r3
+ subhs ip, ip, #(8 * 8)
+
+ lsr r3, r2, #4
+ cmp r3, r1
+ movhs r2, r3
+ subhs ip, #(4 * 8)
+
+ lsr r3, r2, #2
+ cmp r3, r1
+ movhs r2, r3
+ subhs ip, ip, #(2 * 8)
+
+ /* Last block, no need to update r2 or r3. */
+ cmp r1, r2, lsr #1
+ subls ip, ip, #(1 * 8)
+
+ JMP(ip)
+# endif
+
+#define IMM #
+
+#define block(shift) \
+ cmp r0, r1, lsl IMM shift; \
+ IT(hs); \
+ WIDE(subhs) r0, r0, r1, lsl IMM shift
+
+ block(31)
+ block(30)
+ block(29)
+ block(28)
+ block(27)
+ block(26)
+ block(25)
+ block(24)
+ block(23)
+ block(22)
+ block(21)
+ block(20)
+ block(19)
+ block(18)
+ block(17)
+ block(16)
+ block(15)
+ block(14)
+ block(13)
+ block(12)
+ block(11)
+ block(10)
+ block(9)
+ block(8)
+ block(7)
+ block(6)
+ block(5)
+ block(4)
+ block(3)
+ block(2)
+ block(1)
+LOCAL_LABEL(div0block):
+ block(0)
+ JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+ mov r0, #0
+#ifdef __ARM_EABI__
+ b __aeabi_idiv0
+#else
+ JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__umodsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S
new file mode 100644
index 000000000000..6625fa8a3119
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S
@@ -0,0 +1,37 @@
+//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unorddf2vfp(double a, double b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where double precision arguments are passsed
+// like in GPR pairs.
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
+ vmov d6, r0, r1 // load r0/r1 pair in double register
+ vmov d7, r2, r3 // load r2/r3 pair in double register
+ vcmp.f64 d6, d7
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(vs)
+ movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
+ movvc r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__unorddf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S
new file mode 100644
index 000000000000..0b5da2ba3e17
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unordsf2vfp(float a, float b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where single precision arguments are passsed
+// like 32-bit ints
+//
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
+ vmov s14, r0 // move from GPR 0 to float register
+ vmov s15, r1 // move from GPR 1 to float register
+ vcmp.f32 s14, s15
+#endif
+ vmrs apsr_nzcv, fpscr
+ ITE(vs)
+ movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
+ movvc r0, #0
+ bx lr
+END_COMPILERRT_FUNCTION(__unordsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/ashldi3.c b/contrib/compiler-rt/lib/builtins/ashldi3.c
new file mode 100644
index 000000000000..a5c1836006b9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ashldi3.c
@@ -0,0 +1,45 @@
+/* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashldi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a << b */
+
+/* Precondition: 0 <= b < bits_in_dword */
+
+COMPILER_RT_ABI di_int
+__ashldi3(di_int a, si_int b)
+{
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ dwords input;
+ dwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
+ {
+ result.s.low = 0;
+ result.s.high = input.s.low << (b - bits_in_word);
+ }
+ else /* 0 <= b < bits_in_word */
+ {
+ if (b == 0)
+ return a;
+ result.s.low = input.s.low << b;
+ result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b));
+ }
+ return result.all;
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ashlti3.c b/contrib/compiler-rt/lib/builtins/ashlti3.c
new file mode 100644
index 000000000000..638ae845ff04
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ashlti3.c
@@ -0,0 +1,45 @@
+/* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashlti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a << b */
+
+/* Precondition: 0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__ashlti3(ti_int a, si_int b)
+{
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ twords input;
+ twords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
+ {
+ result.s.low = 0;
+ result.s.high = input.s.low << (b - bits_in_dword);
+ }
+ else /* 0 <= b < bits_in_dword */
+ {
+ if (b == 0)
+ return a;
+ result.s.low = input.s.low << b;
+ result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b));
+ }
+ return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/ashrdi3.c b/contrib/compiler-rt/lib/builtins/ashrdi3.c
new file mode 100644
index 000000000000..84619965eca0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ashrdi3.c
@@ -0,0 +1,46 @@
+/*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashrdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: arithmetic a >> b */
+
+/* Precondition: 0 <= b < bits_in_dword */
+
+COMPILER_RT_ABI di_int
+__ashrdi3(di_int a, si_int b)
+{
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ dwords input;
+ dwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
+ {
+ /* result.s.high = input.s.high < 0 ? -1 : 0 */
+ result.s.high = input.s.high >> (bits_in_word - 1);
+ result.s.low = input.s.high >> (b - bits_in_word);
+ }
+ else /* 0 <= b < bits_in_word */
+ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ashrti3.c b/contrib/compiler-rt/lib/builtins/ashrti3.c
new file mode 100644
index 000000000000..f78205d961e4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ashrti3.c
@@ -0,0 +1,46 @@
+/* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ashrti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: arithmetic a >> b */
+
+/* Precondition: 0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__ashrti3(ti_int a, si_int b)
+{
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ twords input;
+ twords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
+ {
+ /* result.s.high = input.s.high < 0 ? -1 : 0 */
+ result.s.high = input.s.high >> (bits_in_dword - 1);
+ result.s.low = input.s.high >> (b - bits_in_dword);
+ }
+ else /* 0 <= b < bits_in_dword */
+ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/assembly.h b/contrib/compiler-rt/lib/builtins/assembly.h
new file mode 100644
index 000000000000..3f5e59b25442
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/assembly.h
@@ -0,0 +1,204 @@
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#define SEPARATOR @
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN(name) .private_extern name
+#define LOCAL_LABEL(name) L_##name
+// tell linker it can break up file at label boundaries
+#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols
+#define SYMBOL_IS_FUNC(name)
+#define CONST_SECTION .const
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__ELF__)
+
+#define HIDDEN(name) .hidden name
+#define LOCAL_LABEL(name) .L_##name
+#define FILE_LEVEL_DIRECTIVE
+#if defined(__arm__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define CONST_SECTION .section .rodata
+
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+ defined(__linux__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
+#else // !__APPLE__ && !__ELF__
+
+#define HIDDEN(name)
+#define LOCAL_LABEL(name) .L ## name
+#define FILE_LEVEL_DIRECTIVE
+#define SYMBOL_IS_FUNC(name) \
+ .def name SEPARATOR \
+ .scl 2 SEPARATOR \
+ .type 32 SEPARATOR \
+ .endef
+#define CONST_SECTION .section .rdata,"rd"
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#endif
+
+#if defined(__arm__)
+
+/*
+ * Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
+ * - for '-mthumb -march=armv6' compiler defines '__thumb__'
+ * - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__'
+ */
+#if defined(__thumb2__) || defined(__thumb__)
+#define DEFINE_CODE_STATE .thumb SEPARATOR
+#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR
+#if defined(__thumb2__)
+#define USE_THUMB_2
+#define IT(cond) it cond
+#define ITT(cond) itt cond
+#define ITE(cond) ite cond
+#else
+#define USE_THUMB_1
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif // defined(__thumb__2)
+#else // !defined(__thumb2__) && !defined(__thumb__)
+#define DEFINE_CODE_STATE .arm SEPARATOR
+#define DECLARE_FUNC_ENCODING
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif
+
+#if defined(USE_THUMB_1) && defined(USE_THUMB_2)
+#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together."
+#endif
+
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \
+ (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+#define __ARM_FEATURE_CLZ
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#define JMPc(r, c) bx##c r
+#else
+#define JMP(r) mov pc, r
+#define JMPc(r, c) mov##c pc, r
+#endif
+
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC() \
+ pop {ip}; \
+ JMP(ip)
+#endif
+
+#if defined(USE_THUMB_2)
+#define WIDE(op) op.w
+#else
+#define WIDE(op) op
+#endif
+#else // !defined(__arm)
+#define DECLARE_FUNC_ENCODING
+#define DEFINE_CODE_STATE
+#endif
+
+#define GLUE2(a, b) a##b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name) \
+ HIDDEN(SYMBOL_NAME(name)) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) \
+ DECLARE_FUNC_ENCODING \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \
+ .thumb_func SEPARATOR \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ HIDDEN(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+ DEFINE_CODE_STATE \
+ .globl name SEPARATOR \
+ SYMBOL_IS_FUNC(name) SEPARATOR \
+ HIDDEN(name) SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR \
+ .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \
+ DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#ifdef __ELF__
+#define END_COMPILERRT_FUNCTION(name) \
+ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#else
+#define END_COMPILERRT_FUNCTION(name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
diff --git a/contrib/compiler-rt/lib/builtins/atomic.c b/contrib/compiler-rt/lib/builtins/atomic.c
new file mode 100644
index 000000000000..ee35e342eda5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic.c
@@ -0,0 +1,338 @@
+/*===-- atomic.c - Implement support functions for atomic operations.------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * atomic.c defines a set of functions for performing atomic accesses on
+ * arbitrary-sized memory locations. This design uses locks that should
+ * be fast in the uncontended case, for two reasons:
+ *
+ * 1) This code must work with C programs that do not link to anything
+ * (including pthreads) and so it should not depend on any pthread
+ * functions.
+ * 2) Atomic operations, rather than explicit mutexes, are most commonly used
+ * on code where contended operations are rate.
+ *
+ * To avoid needing a per-object lock, this code allocates an array of
+ * locks and hashes the object pointers to find the one that it should use.
+ * For operations that must be atomic on two locations, the lower lock is
+ * always acquired first, to avoid deadlock.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "assembly.h"
+
+// Clang objects if you redefine a builtin. This little hack allows us to
+// define a function with the same name as an intrinsic.
+#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load)
+#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store)
+#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange)
+#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange)
+
+/// Number of locks. This allocates one page on 32-bit platforms, two on
+/// 64-bit. This can be specified externally if a different trade between
+/// memory usage and contention probability is required for a given platform.
+#ifndef SPINLOCK_COUNT
+#define SPINLOCK_COUNT (1<<10)
+#endif
+static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
+
+////////////////////////////////////////////////////////////////////////////////
+// Platform-specific lock implementation. Falls back to spinlocks if none is
+// defined. Each platform should define the Lock type, and corresponding
+// lock() and unlock() functions.
+////////////////////////////////////////////////////////////////////////////////
+#ifdef __FreeBSD__
+#include <errno.h>
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <sys/umtx.h>
+typedef struct _usem Lock;
+__inline static void unlock(Lock *l) {
+ __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE);
+ __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+ if (l->_has_waiters)
+ _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0);
+}
+__inline static void lock(Lock *l) {
+ uint32_t old = 1;
+ while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old,
+ 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+ _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0);
+ old = 1;
+ }
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT] = { [0 ... SPINLOCK_COUNT-1] = {0,1,0} };
+
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+typedef OSSpinLock Lock;
+__inline static void unlock(Lock *l) {
+ OSSpinLockUnlock(l);
+}
+/// Locks a lock. In the current implementation, this is potentially
+/// unbounded in the contended case.
+__inline static void lock(Lock *l) {
+ OSSpinLockLock(l);
+}
+static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0
+
+#else
+typedef _Atomic(uintptr_t) Lock;
+/// Unlock a lock. This is a release operation.
+__inline static void unlock(Lock *l) {
+ __c11_atomic_store(l, 0, __ATOMIC_RELEASE);
+}
+/// Locks a lock. In the current implementation, this is potentially
+/// unbounded in the contended case.
+__inline static void lock(Lock *l) {
+ uintptr_t old = 0;
+ while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED))
+ old = 0;
+}
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT];
+#endif
+
+
+/// Returns a lock to use for a given pointer.
+static __inline Lock *lock_for_pointer(void *ptr) {
+ intptr_t hash = (intptr_t)ptr;
+ // Disregard the lowest 4 bits. We want all values that may be part of the
+ // same memory operation to hash to the same value and therefore use the same
+ // lock.
+ hash >>= 4;
+ // Use the next bits as the basis for the hash
+ intptr_t low = hash & SPINLOCK_MASK;
+ // Now use the high(er) set of bits to perturb the hash, so that we don't
+ // get collisions from atomic fields in a single object
+ hash >>= 16;
+ hash ^= low;
+ // Return a pointer to the word to use
+ return locks + (hash & SPINLOCK_MASK);
+}
+
+/// Macros for determining whether a size is lock free. Clang can not yet
+/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are
+/// not lock free.
+#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1)
+#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2)
+#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4)
+#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8)
+#define IS_LOCK_FREE_16 0
+
+/// Macro that calls the compiler-generated lock-free versions of functions
+/// when they exist.
+#define LOCK_FREE_CASES() \
+ do {\
+ switch (size) {\
+ case 2:\
+ if (IS_LOCK_FREE_2) {\
+ LOCK_FREE_ACTION(uint16_t);\
+ }\
+ case 4:\
+ if (IS_LOCK_FREE_4) {\
+ LOCK_FREE_ACTION(uint32_t);\
+ }\
+ case 8:\
+ if (IS_LOCK_FREE_8) {\
+ LOCK_FREE_ACTION(uint64_t);\
+ }\
+ case 16:\
+ if (IS_LOCK_FREE_16) {\
+ /* FIXME: __uint128_t isn't available on 32 bit platforms.
+ LOCK_FREE_ACTION(__uint128_t);*/\
+ }\
+ }\
+ } while (0)
+
+
+/// An atomic load operation. This is atomic with respect to the source
+/// pointer only.
+void __atomic_load_c(int size, void *src, void *dest, int model) {
+#define LOCK_FREE_ACTION(type) \
+ *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\
+ return;
+ LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(src);
+ lock(l);
+ memcpy(dest, src, size);
+ unlock(l);
+}
+
+/// An atomic store operation. This is atomic with respect to the destination
+/// pointer only.
+void __atomic_store_c(int size, void *dest, void *src, int model) {
+#define LOCK_FREE_ACTION(type) \
+ __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\
+ return;
+ LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(dest);
+ lock(l);
+ memcpy(dest, src, size);
+ unlock(l);
+}
+
+/// Atomic compare and exchange operation. If the value at *ptr is identical
+/// to the value at *expected, then this copies value at *desired to *ptr. If
+/// they are not, then this stores the current value from *ptr in *expected.
+///
+/// This function returns 1 if the exchange takes place or 0 if it fails.
+int __atomic_compare_exchange_c(int size, void *ptr, void *expected,
+ void *desired, int success, int failure) {
+#define LOCK_FREE_ACTION(type) \
+ return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\
+ *(type*)desired, success, failure)
+ LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(ptr);
+ lock(l);
+ if (memcmp(ptr, expected, size) == 0) {
+ memcpy(ptr, desired, size);
+ unlock(l);
+ return 1;
+ }
+ memcpy(expected, ptr, size);
+ unlock(l);
+ return 0;
+}
+
+/// Performs an atomic exchange operation between two pointers. This is atomic
+/// with respect to the target address.
+void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) {
+#define LOCK_FREE_ACTION(type) \
+ *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\
+ model);\
+ return;
+ LOCK_FREE_CASES();
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(ptr);
+ lock(l);
+ memcpy(old, ptr, size);
+ memcpy(ptr, val, size);
+ unlock(l);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Where the size is known at compile time, the compiler may emit calls to
+// specialised versions of the above functions.
+////////////////////////////////////////////////////////////////////////////////
+#ifdef __SIZEOF_INT128__
+#define OPTIMISED_CASES\
+ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
+ OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define OPTIMISED_CASES\
+ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_load_##n(type *src, int model) {\
+ if (lockfree)\
+ return __c11_atomic_load((_Atomic(type)*)src, model);\
+ Lock *l = lock_for_pointer(src);\
+ lock(l);\
+ type val = *src;\
+ unlock(l);\
+ return val;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+void __atomic_store_##n(type *dest, type val, int model) {\
+ if (lockfree) {\
+ __c11_atomic_store((_Atomic(type)*)dest, val, model);\
+ return;\
+ }\
+ Lock *l = lock_for_pointer(dest);\
+ lock(l);\
+ *dest = val;\
+ unlock(l);\
+ return;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+type __atomic_exchange_##n(type *dest, type val, int model) {\
+ if (lockfree)\
+ return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\
+ Lock *l = lock_for_pointer(dest);\
+ lock(l);\
+ type tmp = *dest;\
+ *dest = val;\
+ unlock(l);\
+ return tmp;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type)\
+int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\
+ int success, int failure) {\
+ if (lockfree)\
+ return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\
+ success, failure);\
+ Lock *l = lock_for_pointer(ptr);\
+ lock(l);\
+ if (*ptr == *expected) {\
+ *ptr = desired;\
+ unlock(l);\
+ return 1;\
+ }\
+ *expected = *ptr;\
+ unlock(l);\
+ return 0;\
+}
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+////////////////////////////////////////////////////////////////////////////////
+// Atomic read-modify-write operations for integers of various sizes.
+////////////////////////////////////////////////////////////////////////////////
+#define ATOMIC_RMW(n, lockfree, type, opname, op) \
+type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\
+ if (lockfree) \
+ return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\
+ Lock *l = lock_for_pointer(ptr);\
+ lock(l);\
+ type tmp = *ptr;\
+ *ptr = tmp op val;\
+ unlock(l);\
+ return tmp;\
+}
+
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c
new file mode 100644
index 000000000000..da912af64312
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear.c
@@ -0,0 +1,27 @@
+/*===-- atomic_flag_clear.c -------------------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_clear from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_clear
+void atomic_flag_clear(volatile atomic_flag *object) {
+ __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c
new file mode 100644
index 000000000000..1059b787f169
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_clear_explicit.c
@@ -0,0 +1,28 @@
+/*===-- atomic_flag_clear_explicit.c ----------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_clear_explicit from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_clear_explicit
+void atomic_flag_clear_explicit(volatile atomic_flag *object,
+ memory_order order) {
+ __c11_atomic_store(&(object)->_Value, 0, order);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c
new file mode 100644
index 000000000000..e8811d39ef25
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set.c
@@ -0,0 +1,27 @@
+/*===-- atomic_flag_test_and_set.c ------------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_test_and_set from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_test_and_set
+_Bool atomic_flag_test_and_set(volatile atomic_flag *object) {
+ return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c
new file mode 100644
index 000000000000..5c8c2df90543
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_flag_test_and_set_explicit.c
@@ -0,0 +1,28 @@
+/*===-- atomic_flag_test_and_set_explicit.c ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_flag_test_and_set_explicit
+_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
+ memory_order order) {
+ return __c11_atomic_exchange(&(object)->_Value, 1, order);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c
new file mode 100644
index 000000000000..9ccc2ae60ad8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_signal_fence.c
@@ -0,0 +1,27 @@
+/*===-- atomic_signal_fence.c -----------------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_signal_fence from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_signal_fence
+void atomic_signal_fence(memory_order order) {
+ __c11_atomic_signal_fence(order);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c
new file mode 100644
index 000000000000..d22560151bc8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/atomic_thread_fence.c
@@ -0,0 +1,27 @@
+/*===-- atomic_thread_fence.c -----------------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------===
+ *
+ * This file implements atomic_thread_fence from C11's stdatomic.h.
+ *
+ *===------------------------------------------------------------------------===
+ */
+
+#ifndef __has_include
+#define __has_include(inc) 0
+#endif
+
+#if __has_include(<stdatomic.h>)
+
+#include <stdatomic.h>
+#undef atomic_thread_fence
+void atomic_thread_fence(memory_order order) {
+ __c11_atomic_thread_fence(order);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/bswapdi2.c b/contrib/compiler-rt/lib/builtins/bswapdi2.c
new file mode 100644
index 000000000000..eb220007bb10
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/bswapdi2.c
@@ -0,0 +1,27 @@
+/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+ return (
+ (((u)&0xff00000000000000ULL) >> 56) |
+ (((u)&0x00ff000000000000ULL) >> 40) |
+ (((u)&0x0000ff0000000000ULL) >> 24) |
+ (((u)&0x000000ff00000000ULL) >> 8) |
+ (((u)&0x00000000ff000000ULL) << 8) |
+ (((u)&0x0000000000ff0000ULL) << 24) |
+ (((u)&0x000000000000ff00ULL) << 40) |
+ (((u)&0x00000000000000ffULL) << 56));
+}
diff --git a/contrib/compiler-rt/lib/builtins/bswapsi2.c b/contrib/compiler-rt/lib/builtins/bswapsi2.c
new file mode 100644
index 000000000000..5d941e69f7c4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/bswapsi2.c
@@ -0,0 +1,23 @@
+/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+ return (
+ (((u)&0xff000000) >> 24) |
+ (((u)&0x00ff0000) >> 8) |
+ (((u)&0x0000ff00) << 8) |
+ (((u)&0x000000ff) << 24));
+}
diff --git a/contrib/compiler-rt/lib/builtins/clear_cache.c b/contrib/compiler-rt/lib/builtins/clear_cache.c
new file mode 100644
index 000000000000..9dcab344ad1b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/clear_cache.c
@@ -0,0 +1,199 @@
+/* ===-- clear_cache.c - Implement __clear_cache ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include <assert.h>
+#include <stddef.h>
+
+#if __APPLE__
+ #include <libkern/OSCacheControl.h>
+#endif
+
+#if defined(_WIN32)
+/* Forward declare Win32 APIs since the GCC mode driver does not handle the
+ newer SDKs as well as needed. */
+uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress,
+ uintptr_t dwSize);
+uintptr_t GetCurrentProcess(void);
+#endif
+
+#if defined(__FreeBSD__) && defined(__arm__)
+ #include <sys/types.h>
+ #include <machine/sysarch.h>
+#endif
+
+#if defined(__NetBSD__) && defined(__arm__)
+ #include <machine/sysarch.h>
+#endif
+
+#if defined(__OpenBSD__) && defined(__mips__)
+ #include <sys/types.h>
+ #include <machine/sysarch.h>
+#endif
+
+#if defined(__linux__) && defined(__mips__)
+ #include <sys/cachectl.h>
+ #include <sys/syscall.h>
+ #include <unistd.h>
+ #if defined(__ANDROID__) && defined(__LP64__)
+ /*
+ * clear_mips_cache - Invalidates instruction cache for Mips.
+ */
+ static void clear_mips_cache(const void* Addr, size_t Size) {
+ __asm__ volatile (
+ ".set push\n"
+ ".set noreorder\n"
+ ".set noat\n"
+ "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */
+ "nop\n"
+ "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */
+ "rdhwr $v0, $1\n" /* Get step size for SYNCI.
+ $1 is $HW_SYNCI_Step */
+ "beq $v0, $zero, 20f\n" /* If no caches require
+ synchronization, branch
+ around. */
+ "nop\n"
+ "10:\n"
+ "synci 0(%[Addr])\n" /* Synchronize all caches around
+ address. */
+ "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */
+ "sltu $at, %[Addr], %[Size]\n" /* Compare current with end
+ address. */
+ "bne $at, $zero, 10b\n" /* Branch if more to do. */
+ "nop\n"
+ "sync\n" /* Clear memory hazards. */
+ "20:\n"
+ "bal 30f\n"
+ "nop\n"
+ "30:\n"
+ "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here.
+ Add offset of 12 to point to the
+ instruction after the last nop.
+ */
+ "jr.hb $ra\n" /* Return, clearing instruction
+ hazards. */
+ "nop\n"
+ ".set pop\n"
+ : [Addr] "+r"(Addr), [Size] "+r"(Size)
+ :: "at", "ra", "v0", "memory"
+ );
+ }
+ #endif
+#endif
+
+/*
+ * The compiler generates calls to __clear_cache() when creating
+ * trampoline functions on the stack for use with nested functions.
+ * It is expected to invalidate the instruction cache for the
+ * specified range.
+ */
+
+void __clear_cache(void *start, void *end) {
+#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
+/*
+ * Intel processors have a unified instruction and data cache
+ * so there is nothing to do
+ */
+#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
+ FlushInstructionCache(GetCurrentProcess(), start, end - start);
+#elif defined(__arm__) && !defined(__APPLE__)
+ #if defined(__FreeBSD__) || defined(__NetBSD__)
+ struct arm_sync_icache_args arg;
+
+ arg.addr = (uintptr_t)start;
+ arg.len = (uintptr_t)end - (uintptr_t)start;
+
+ sysarch(ARM_SYNC_ICACHE, &arg);
+ #elif defined(__linux__)
+ /*
+ * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
+ * it also brought many other unused defines, as well as a dependency on
+ * kernel headers to be installed.
+ *
+ * This value is stable at least since Linux 3.13 and should remain so for
+ * compatibility reasons, warranting it's re-definition here.
+ */
+ #define __ARM_NR_cacheflush 0x0f0002
+ register int start_reg __asm("r0") = (int) (intptr_t) start;
+ const register int end_reg __asm("r1") = (int) (intptr_t) end;
+ const register int flags __asm("r2") = 0;
+ const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
+ __asm __volatile("svc 0x0"
+ : "=r"(start_reg)
+ : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
+ "r"(flags));
+ assert(start_reg == 0 && "Cache flush syscall failed.");
+ #else
+ compilerrt_abort();
+ #endif
+#elif defined(__linux__) && defined(__mips__)
+ const uintptr_t start_int = (uintptr_t) start;
+ const uintptr_t end_int = (uintptr_t) end;
+ #if defined(__ANDROID__) && defined(__LP64__)
+ // Call synci implementation for short address range.
+ const uintptr_t address_range_limit = 256;
+ if ((end_int - start_int) <= address_range_limit) {
+ clear_mips_cache(start, (end_int - start_int));
+ } else {
+ syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
+ }
+ #else
+ syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
+ #endif
+#elif defined(__mips__) && defined(__OpenBSD__)
+ cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
+#elif defined(__aarch64__) && !defined(__APPLE__)
+ uint64_t xstart = (uint64_t)(uintptr_t) start;
+ uint64_t xend = (uint64_t)(uintptr_t) end;
+ uint64_t addr;
+
+ // Get Cache Type Info
+ uint64_t ctr_el0;
+ __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+
+ /*
+ * dc & ic instructions must use 64bit registers so we don't use
+ * uintptr_t in case this runs in an IPL32 environment.
+ */
+ const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
+ for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
+ addr += dcache_line_size)
+ __asm __volatile("dc cvau, %0" :: "r"(addr));
+ __asm __volatile("dsb ish");
+
+ const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
+ for (addr = xstart & ~(icache_line_size - 1); addr < xend;
+ addr += icache_line_size)
+ __asm __volatile("ic ivau, %0" :: "r"(addr));
+ __asm __volatile("isb sy");
+#elif defined (__powerpc64__)
+ const size_t line_size = 32;
+ const size_t len = (uintptr_t)end - (uintptr_t)start;
+
+ const uintptr_t mask = ~(line_size - 1);
+ const uintptr_t start_line = ((uintptr_t)start) & mask;
+ const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask;
+
+ for (uintptr_t line = start_line; line < end_line; line += line_size)
+ __asm__ volatile("dcbf 0, %0" : : "r"(line));
+ __asm__ volatile("sync");
+
+ for (uintptr_t line = start_line; line < end_line; line += line_size)
+ __asm__ volatile("icbi 0, %0" : : "r"(line));
+ __asm__ volatile("isync");
+#else
+ #if __APPLE__
+ /* On Darwin, sys_icache_invalidate() provides this functionality */
+ sys_icache_invalidate(start, end-start);
+ #else
+ compilerrt_abort();
+ #endif
+#endif
+}
diff --git a/contrib/compiler-rt/lib/builtins/clzdi2.c b/contrib/compiler-rt/lib/builtins/clzdi2.c
new file mode 100644
index 000000000000..1819e6be436b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/clzdi2.c
@@ -0,0 +1,40 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of leading 0-bits */
+
+#if !defined(__clang__) && \
+ ((defined(__sparc__) && defined(__arch64__)) || \
+ defined(__mips64) || \
+ (defined(__riscv) && __SIZEOF_POINTER__ >= 8))
+/* On 64-bit architectures with neither a native clz instruction nor a native
+ * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than
+ * __clzsi2, leading to infinite recursion. */
+#define __builtin_clz(a) __clzsi2(a)
+extern si_int __clzsi2(si_int);
+#endif
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzdi2(di_int a)
+{
+ dwords x;
+ x.all = a;
+ const si_int f = -(x.s.high == 0);
+ return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) +
+ (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/contrib/compiler-rt/lib/builtins/clzsi2.c b/contrib/compiler-rt/lib/builtins/clzsi2.c
new file mode 100644
index 000000000000..25b8ed2c4c24
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/clzsi2.c
@@ -0,0 +1,53 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of leading 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzsi2(si_int a)
+{
+ su_int x = (su_int)a;
+ si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */
+ x >>= 16 - t; /* x = [0 - 0xFFFF] */
+ su_int r = t; /* r = [0, 16] */
+ /* return r + clz(x) */
+ t = ((x & 0xFF00) == 0) << 3;
+ x >>= 8 - t; /* x = [0 - 0xFF] */
+ r += t; /* r = [0, 8, 16, 24] */
+ /* return r + clz(x) */
+ t = ((x & 0xF0) == 0) << 2;
+ x >>= 4 - t; /* x = [0 - 0xF] */
+ r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
+ /* return r + clz(x) */
+ t = ((x & 0xC) == 0) << 1;
+ x >>= 2 - t; /* x = [0 - 3] */
+ r += t; /* r = [0 - 30] and is even */
+ /* return r + clz(x) */
+/* switch (x)
+ * {
+ * case 0:
+ * return r + 2;
+ * case 1:
+ * return r + 1;
+ * case 2:
+ * case 3:
+ * return r;
+ * }
+ */
+ return r + ((2 - x) & -((x & 2) == 0));
+}
diff --git a/contrib/compiler-rt/lib/builtins/clzti2.c b/contrib/compiler-rt/lib/builtins/clzti2.c
new file mode 100644
index 000000000000..15a7b3c90004
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/clzti2.c
@@ -0,0 +1,33 @@
+/* ===-- clzti2.c - Implement __clzti2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __clzti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the number of leading 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__clzti2(ti_int a)
+{
+ twords x;
+ x.all = a;
+ const di_int f = -(x.s.high == 0);
+ return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
+ ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/cmpdi2.c b/contrib/compiler-rt/lib/builtins/cmpdi2.c
new file mode 100644
index 000000000000..52634d9c3362
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/cmpdi2.c
@@ -0,0 +1,51 @@
+/* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __cmpdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: if (a < b) returns 0
+* if (a == b) returns 1
+* if (a > b) returns 2
+*/
+
+COMPILER_RT_ABI si_int
+__cmpdi2(di_int a, di_int b)
+{
+ dwords x;
+ x.all = a;
+ dwords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
+#ifdef __ARM_EABI__
+/* Returns: if (a < b) returns -1
+* if (a == b) returns 0
+* if (a > b) returns 1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_lcmp(di_int a, di_int b)
+{
+ return __cmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/contrib/compiler-rt/lib/builtins/cmpti2.c b/contrib/compiler-rt/lib/builtins/cmpti2.c
new file mode 100644
index 000000000000..2c8b56e29a06
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/cmpti2.c
@@ -0,0 +1,42 @@
+/* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __cmpti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: if (a < b) returns 0
+ * if (a == b) returns 1
+ * if (a > b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__cmpti2(ti_int a, ti_int b)
+{
+ twords x;
+ x.all = a;
+ twords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/comparedf2.c b/contrib/compiler-rt/lib/builtins/comparedf2.c
new file mode 100644
index 000000000000..44e5d2b288a6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/comparedf2.c
@@ -0,0 +1,153 @@
+//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+// __eqdf2 __gedf2 __unorddf2
+// __ledf2 __gtdf2
+// __ltdf2
+// __nedf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __ledf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __gedf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unorddf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+enum LE_RESULT {
+ LE_LESS = -1,
+ LE_EQUAL = 0,
+ LE_GREATER = 1,
+ LE_UNORDERED = 1
+};
+
+COMPILER_RT_ABI enum LE_RESULT
+__ledf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ // If either a or b is NaN, they are unordered.
+ if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+
+ // If a and b are both zeros, they are equal.
+ if ((aAbs | bAbs) == 0) return LE_EQUAL;
+
+ // If at least one of a and b is positive, we get the same result comparing
+ // a and b as signed integers as we would with a floating-point compare.
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+
+ // Otherwise, both are negative, so we need to flip the sense of the
+ // comparison to get the correct result. (This assumes a twos- or ones-
+ // complement integer representation; if integers are represented in a
+ // sign-magnitude representation, then this flip is incorrect).
+ else {
+ if (aInt > bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpdf2, __ledf2);
+#endif
+
+enum GE_RESULT {
+ GE_LESS = -1,
+ GE_EQUAL = 0,
+ GE_GREATER = 1,
+ GE_UNORDERED = -1 // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT
+__gedf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+ if ((aAbs | bAbs) == 0) return GE_EQUAL;
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ } else {
+ if (aInt > bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ }
+}
+
+COMPILER_RT_ABI int
+__unorddf2(fp_t a, fp_t b) {
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+ return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT
+__eqdf2(fp_t a, fp_t b) {
+ return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__ltdf2(fp_t a, fp_t b) {
+ return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__nedf2(fp_t a, fp_t b) {
+ return __ledf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT
+__gtdf2(fp_t a, fp_t b) {
+ return __gedf2(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
+ return __unorddf2(a, b);
+}
+#else
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/comparesf2.c b/contrib/compiler-rt/lib/builtins/comparesf2.c
new file mode 100644
index 000000000000..43cd6a6a7003
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/comparesf2.c
@@ -0,0 +1,153 @@
+//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+// __eqsf2 __gesf2 __unordsf2
+// __lesf2 __gtsf2
+// __ltsf2
+// __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __lesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __gesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unordsf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+enum LE_RESULT {
+ LE_LESS = -1,
+ LE_EQUAL = 0,
+ LE_GREATER = 1,
+ LE_UNORDERED = 1
+};
+
+COMPILER_RT_ABI enum LE_RESULT
+__lesf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ // If either a or b is NaN, they are unordered.
+ if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+
+ // If a and b are both zeros, they are equal.
+ if ((aAbs | bAbs) == 0) return LE_EQUAL;
+
+ // If at least one of a and b is positive, we get the same result comparing
+ // a and b as signed integers as we would with a fp_ting-point compare.
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+
+ // Otherwise, both are negative, so we need to flip the sense of the
+ // comparison to get the correct result. (This assumes a twos- or ones-
+ // complement integer representation; if integers are represented in a
+ // sign-magnitude representation, then this flip is incorrect).
+ else {
+ if (aInt > bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmpsf2, __lesf2);
+#endif
+
+enum GE_RESULT {
+ GE_LESS = -1,
+ GE_EQUAL = 0,
+ GE_GREATER = 1,
+ GE_UNORDERED = -1 // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT
+__gesf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+ if ((aAbs | bAbs) == 0) return GE_EQUAL;
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ } else {
+ if (aInt > bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ }
+}
+
+COMPILER_RT_ABI int
+__unordsf2(fp_t a, fp_t b) {
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+ return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT
+__eqsf2(fp_t a, fp_t b) {
+ return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__ltsf2(fp_t a, fp_t b) {
+ return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT
+__nesf2(fp_t a, fp_t b) {
+ return __lesf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT
+__gtsf2(fp_t a, fp_t b) {
+ return __gesf2(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
+ return __unordsf2(a, b);
+}
+#else
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/comparetf2.c b/contrib/compiler-rt/lib/builtins/comparetf2.c
new file mode 100644
index 000000000000..c0ad8ed0aecd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/comparetf2.c
@@ -0,0 +1,138 @@
+//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+// __eqtf2 __getf2 __unordtf2
+// __letf2 __gttf2
+// __lttf2
+// __netf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __letf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __getf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unordtf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __letf2( ) and __getf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+enum LE_RESULT {
+ LE_LESS = -1,
+ LE_EQUAL = 0,
+ LE_GREATER = 1,
+ LE_UNORDERED = 1
+};
+
+COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ // If either a or b is NaN, they are unordered.
+ if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+
+ // If a and b are both zeros, they are equal.
+ if ((aAbs | bAbs) == 0) return LE_EQUAL;
+
+ // If at least one of a and b is positive, we get the same result comparing
+ // a and b as signed integers as we would with a floating-point compare.
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+ else {
+ // Otherwise, both are negative, so we need to flip the sense of the
+ // comparison to get the correct result. (This assumes a twos- or ones-
+ // complement integer representation; if integers are represented in a
+ // sign-magnitude representation, then this flip is incorrect).
+ if (aInt > bInt) return LE_LESS;
+ else if (aInt == bInt) return LE_EQUAL;
+ else return LE_GREATER;
+ }
+}
+
+#if defined(__ELF__)
+// Alias for libgcc compatibility
+FNALIAS(__cmptf2, __letf2);
+#endif
+
+enum GE_RESULT {
+ GE_LESS = -1,
+ GE_EQUAL = 0,
+ GE_GREATER = 1,
+ GE_UNORDERED = -1 // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) {
+
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+ if ((aAbs | bAbs) == 0) return GE_EQUAL;
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ } else {
+ if (aInt > bInt) return GE_LESS;
+ else if (aInt == bInt) return GE_EQUAL;
+ else return GE_GREATER;
+ }
+}
+
+COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) {
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+ return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceding routines.
+
+COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) {
+ return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) {
+ return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) {
+ return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) {
+ return __getf2(a, b);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/cpu_model.c b/contrib/compiler-rt/lib/builtins/cpu_model.c
new file mode 100644
index 000000000000..fb2b899fc70a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/cpu_model.c
@@ -0,0 +1,651 @@
+//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is based on LLVM's lib/Support/Host.cpp.
+// It implements the operating system Host concept and builtin
+// __cpu_model for the compiler_rt library, for x86 only.
+//
+//===----------------------------------------------------------------------===//
+
+#if (defined(__i386__) || defined(_M_IX86) || \
+ defined(__x86_64__) || defined(_M_X64)) && \
+ (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
+
+#include <assert.h>
+
+#define bool int
+#define true 1
+#define false 0
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
+enum VendorSignatures {
+ SIG_INTEL = 0x756e6547 /* Genu */,
+ SIG_AMD = 0x68747541 /* Auth */
+};
+
+enum ProcessorVendors {
+ VENDOR_INTEL = 1,
+ VENDOR_AMD,
+ VENDOR_OTHER,
+ VENDOR_MAX
+};
+
+enum ProcessorTypes {
+ INTEL_BONNELL = 1,
+ INTEL_CORE2,
+ INTEL_COREI7,
+ AMDFAM10H,
+ AMDFAM15H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
+ INTEL_KNM,
+ INTEL_GOLDMONT,
+ INTEL_GOLDMONT_PLUS,
+ INTEL_TREMONT,
+ CPU_TYPE_MAX
+};
+
+enum ProcessorSubtypes {
+ INTEL_COREI7_NEHALEM = 1,
+ INTEL_COREI7_WESTMERE,
+ INTEL_COREI7_SANDYBRIDGE,
+ AMDFAM10H_BARCELONA,
+ AMDFAM10H_SHANGHAI,
+ AMDFAM10H_ISTANBUL,
+ AMDFAM15H_BDVER1,
+ AMDFAM15H_BDVER2,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
+ INTEL_COREI7_IVYBRIDGE,
+ INTEL_COREI7_HASWELL,
+ INTEL_COREI7_BROADWELL,
+ INTEL_COREI7_SKYLAKE,
+ INTEL_COREI7_SKYLAKE_AVX512,
+ INTEL_COREI7_CANNONLAKE,
+ INTEL_COREI7_ICELAKE_CLIENT,
+ INTEL_COREI7_ICELAKE_SERVER,
+ CPU_SUBTYPE_MAX
+};
+
+enum ProcessorFeatures {
+ FEATURE_CMOV = 0,
+ FEATURE_MMX,
+ FEATURE_POPCNT,
+ FEATURE_SSE,
+ FEATURE_SSE2,
+ FEATURE_SSE3,
+ FEATURE_SSSE3,
+ FEATURE_SSE4_1,
+ FEATURE_SSE4_2,
+ FEATURE_AVX,
+ FEATURE_AVX2,
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ,
+ FEATURE_AVX512VBMI2,
+ FEATURE_GFNI,
+ FEATURE_VPCLMULQDQ,
+ FEATURE_AVX512VNNI,
+ FEATURE_AVX512BITALG
+};
+
+// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
+// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
+// support. Consequently, for i386, the presence of CPUID is checked first
+// via the corresponding eflags bit.
+static bool isCpuIdSupported() {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__i386__)
+ int __cpuid_supported;
+ __asm__(" pushfl\n"
+ " popl %%eax\n"
+ " movl %%eax,%%ecx\n"
+ " xorl $0x00200000,%%eax\n"
+ " pushl %%eax\n"
+ " popfl\n"
+ " pushfl\n"
+ " popl %%eax\n"
+ " movl $0,%0\n"
+ " cmpl %%eax,%%ecx\n"
+ " je 1f\n"
+ " movl $1,%0\n"
+ "1:"
+ : "=r"(__cpuid_supported)
+ :
+ : "eax", "ecx");
+ if (!__cpuid_supported)
+ return false;
+#endif
+ return true;
+#endif
+ return true;
+}
+
+// This code is copied from lib/Support/Host.cpp.
+// Changes to either file should be mirrored in the other.
+
+/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+/// the specified arguments. If we can't run cpuid on the host, return true.
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+ // FIXME: should we save this for Clang?
+ __asm__("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value));
+ return false;
+#elif defined(__i386__)
+ __asm__("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value));
+ return false;
+#else
+ return true;
+#endif
+#elif defined(_MSC_VER)
+ // The MSVC intrinsic is portable across x86 and x64.
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+/// the 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+ unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
+ unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+ // FIXME: should we save this for Clang?
+ __asm__("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value), "c"(subleaf));
+ return false;
+#elif defined(__i386__)
+ __asm__("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+ : "a"(value), "c"(subleaf));
+ return false;
+#else
+ return true;
+#endif
+#elif defined(_MSC_VER)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+// Read control register 0 (XCR0). Used to detect features such as AVX.
+static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
+ return false;
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+ *rEAX = Result;
+ *rEDX = Result >> 32;
+ return false;
+#else
+ return true;
+#endif
+}
+
+static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
+ unsigned *Model) {
+ *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (*Family == 6 || *Family == 0xf) {
+ if (*Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+static void
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Brand_id, unsigned Features,
+ unsigned *Type, unsigned *Subtype) {
+ if (Brand_id != 0)
+ return;
+ switch (Family) {
+ case 6:
+ switch (Model) {
+ case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+ // mobile processor, Intel Core 2 Extreme processor, Intel
+ // Pentium Dual-Core processor, Intel Xeon processor, model
+ // 0Fh. All processors are manufactured using the 65 nm process.
+ case 0x16: // Intel Celeron processor model 16h. All processors are
+ // manufactured using the 65 nm process
+ case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+ // 17h. All processors are manufactured using the 45 nm process.
+ //
+ // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
+ // the 45 nm process.
+ *Type = INTEL_CORE2; // "penryn"
+ break;
+ case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 45 nm process.
+ case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
+ // As found in a Summer 2010 model iMac.
+ case 0x1f:
+ case 0x2e: // Nehalem EX
+ *Type = INTEL_COREI7; // "nehalem"
+ *Subtype = INTEL_COREI7_NEHALEM;
+ break;
+ case 0x25: // Intel Core i7, laptop version.
+ case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 32 nm process.
+ case 0x2f: // Westmere EX
+ *Type = INTEL_COREI7; // "westmere"
+ *Subtype = INTEL_COREI7_WESTMERE;
+ break;
+ case 0x2a: // Intel Core i7 processor. All processors are manufactured
+ // using the 32 nm process.
+ case 0x2d:
+ *Type = INTEL_COREI7; //"sandybridge"
+ *Subtype = INTEL_COREI7_SANDYBRIDGE;
+ break;
+ case 0x3a:
+ case 0x3e: // Ivy Bridge EP
+ *Type = INTEL_COREI7; // "ivybridge"
+ *Subtype = INTEL_COREI7_IVYBRIDGE;
+ break;
+
+ // Haswell:
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+ *Type = INTEL_COREI7; // "haswell"
+ *Subtype = INTEL_COREI7_HASWELL;
+ break;
+
+ // Broadwell:
+ case 0x3d:
+ case 0x47:
+ case 0x4f:
+ case 0x56:
+ *Type = INTEL_COREI7; // "broadwell"
+ *Subtype = INTEL_COREI7_BROADWELL;
+ break;
+
+ // Skylake:
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
+ *Type = INTEL_COREI7; // "skylake"
+ *Subtype = INTEL_COREI7_SKYLAKE;
+ break;
+
+ // Skylake Xeon:
+ case 0x55:
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ break;
+
+ // Cannonlake:
+ case 0x66:
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
+ break;
+
+ case 0x1c: // Most 45 nm Intel Atom processors
+ case 0x26: // 45 nm Atom Lincroft
+ case 0x27: // 32 nm Atom Medfield
+ case 0x35: // 32 nm Atom Midview
+ case 0x36: // 32 nm Atom Midview
+ *Type = INTEL_BONNELL;
+ break; // "bonnell"
+
+ // Atom Silvermont codes from the Intel software optimization guide.
+ case 0x37:
+ case 0x4a:
+ case 0x4d:
+ case 0x5a:
+ case 0x5d:
+ case 0x4c: // really airmont
+ *Type = INTEL_SILVERMONT;
+ break; // "silvermont"
+ // Goldmont:
+ case 0x5c: // Apollo Lake
+ case 0x5f: // Denverton
+ *Type = INTEL_GOLDMONT;
+ break; // "goldmont"
+ case 0x7a:
+ *Type = INTEL_GOLDMONT_PLUS;
+ break;
+
+ case 0x57:
+ *Type = INTEL_KNL; // knl
+ break;
+
+ case 0x85:
+ *Type = INTEL_KNM; // knm
+ break;
+
+ default: // Unknown family 6 CPU.
+ break;
+ break;
+ }
+ default:
+ break; // Unknown.
+ }
+}
+
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Features, unsigned *Type,
+ unsigned *Subtype) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 16:
+ *Type = AMDFAM10H; // "amdfam10"
+ switch (Model) {
+ case 2:
+ *Subtype = AMDFAM10H_BARCELONA;
+ break;
+ case 4:
+ *Subtype = AMDFAM10H_SHANGHAI;
+ break;
+ case 8:
+ *Subtype = AMDFAM10H_ISTANBUL;
+ break;
+ }
+ break;
+ case 20:
+ *Type = AMD_BTVER1;
+ break; // "btver1";
+ case 21:
+ *Type = AMDFAM15H;
+ if (Model >= 0x60 && Model <= 0x7f) {
+ *Subtype = AMDFAM15H_BDVER4;
+ break; // "bdver4"; 60h-7Fh: Excavator
+ }
+ if (Model >= 0x30 && Model <= 0x3f) {
+ *Subtype = AMDFAM15H_BDVER3;
+ break; // "bdver3"; 30h-3Fh: Steamroller
+ }
+ if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+ *Subtype = AMDFAM15H_BDVER2;
+ break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+ }
+ if (Model <= 0x0f) {
+ *Subtype = AMDFAM15H_BDVER1;
+ break; // "bdver1"; 00h-0Fh: Bulldozer
+ }
+ break;
+ case 22:
+ *Type = AMD_BTVER2;
+ break; // "btver2"
+ case 23:
+ *Type = AMDFAM17H;
+ *Subtype = AMDFAM17H_ZNVER1;
+ break;
+ default:
+ break; // "generic"
+ }
+}
+
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+ unsigned *FeaturesOut,
+ unsigned *Features2Out) {
+ unsigned Features = 0;
+ unsigned Features2 = 0;
+ unsigned EAX, EBX;
+
+#define setFeature(F) \
+ do { \
+ if (F < 32) \
+ Features |= 1U << (F & 0x1f); \
+ else if (F < 64) \
+ Features2 |= 1U << ((F - 32) & 0x1f); \
+ } while (0)
+
+ if ((EDX >> 15) & 1)
+ setFeature(FEATURE_CMOV);
+ if ((EDX >> 23) & 1)
+ setFeature(FEATURE_MMX);
+ if ((EDX >> 25) & 1)
+ setFeature(FEATURE_SSE);
+ if ((EDX >> 26) & 1)
+ setFeature(FEATURE_SSE2);
+
+ if ((ECX >> 0) & 1)
+ setFeature(FEATURE_SSE3);
+ if ((ECX >> 1) & 1)
+ setFeature(FEATURE_PCLMUL);
+ if ((ECX >> 9) & 1)
+ setFeature(FEATURE_SSSE3);
+ if ((ECX >> 12) & 1)
+ setFeature(FEATURE_FMA);
+ if ((ECX >> 19) & 1)
+ setFeature(FEATURE_SSE4_1);
+ if ((ECX >> 20) & 1)
+ setFeature(FEATURE_SSE4_2);
+ if ((ECX >> 23) & 1)
+ setFeature(FEATURE_POPCNT);
+ if ((ECX >> 25) & 1)
+ setFeature(FEATURE_AES);
+
+ // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+ // indicates that the AVX registers will be saved and restored on context
+ // switch, then we have full AVX support.
+ const unsigned AVXBits = (1 << 27) | (1 << 28);
+ bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+ ((EAX & 0x6) == 0x6);
+ bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+
+ if (HasAVX)
+ setFeature(FEATURE_AVX);
+
+ bool HasLeaf7 =
+ MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+ if (HasLeaf7 && ((EBX >> 3) & 1))
+ setFeature(FEATURE_BMI);
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ setFeature(FEATURE_AVX2);
+ if (HasLeaf7 && ((EBX >> 9) & 1))
+ setFeature(FEATURE_BMI2);
+ if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512F);
+ if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512DQ);
+ if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512IFMA);
+ if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512PF);
+ if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512ER);
+ if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512CD);
+ if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512BW);
+ if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VL);
+
+ if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 8) & 1))
+ setFeature(FEATURE_GFNI);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+ setFeature(FEATURE_VPCLMULQDQ);
+ if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VNNI);
+ if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512BITALG);
+ if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX512VPOPCNTDQ);
+
+ if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX5124VNNIW);
+ if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+ setFeature(FEATURE_AVX5124FMAPS);
+
+ unsigned MaxExtLevel;
+ getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+ !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf1 && ((ECX >> 6) & 1))
+ setFeature(FEATURE_SSE4_A);
+ if (HasExtLeaf1 && ((ECX >> 11) & 1))
+ setFeature(FEATURE_XOP);
+ if (HasExtLeaf1 && ((ECX >> 16) & 1))
+ setFeature(FEATURE_FMA4);
+
+ *FeaturesOut = Features;
+ *Features2Out = Features2;
+#undef setFeature
+}
+
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
+#else
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
+#endif
+
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
+
+struct __processor_model {
+ unsigned int __cpu_vendor;
+ unsigned int __cpu_type;
+ unsigned int __cpu_subtype;
+ unsigned int __cpu_features[1];
+} __cpu_model = {0, 0, 0, {0}};
+unsigned int __cpu_features2;
+
+/* A constructor function that is sets __cpu_model and __cpu_features2 with
+ the right values. This needs to run only once. This constructor is
+ given the highest priority and it should run before constructors without
+ the priority set. However, it still runs after ifunc initializers and
+ needs to be called explicitly there. */
+
+int CONSTRUCTOR_ATTRIBUTE
+__cpu_indicator_init(void) {
+ unsigned EAX, EBX, ECX, EDX;
+ unsigned MaxLeaf = 5;
+ unsigned Vendor;
+ unsigned Model, Family, Brand_id;
+ unsigned Features = 0;
+ unsigned Features2 = 0;
+
+ /* This function needs to run just once. */
+ if (__cpu_model.__cpu_vendor)
+ return 0;
+
+ if (!isCpuIdSupported())
+ return -1;
+
+ /* Assume cpuid insn present. Run in level 0 to get vendor id. */
+ if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+ __cpu_model.__cpu_vendor = VENDOR_OTHER;
+ return -1;
+ }
+ getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+ detectX86FamilyModel(EAX, &Family, &Model);
+ Brand_id = EBX & 0xff;
+
+ /* Find available features. */
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
+ __cpu_model.__cpu_features[0] = Features;
+ __cpu_features2 = Features2;
+
+ if (Vendor == SIG_INTEL) {
+ /* Get CPU type. */
+ getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
+ &(__cpu_model.__cpu_type),
+ &(__cpu_model.__cpu_subtype));
+ __cpu_model.__cpu_vendor = VENDOR_INTEL;
+ } else if (Vendor == SIG_AMD) {
+ /* Get CPU type. */
+ getAMDProcessorTypeAndSubtype(Family, Model, Features,
+ &(__cpu_model.__cpu_type),
+ &(__cpu_model.__cpu_subtype));
+ __cpu_model.__cpu_vendor = VENDOR_AMD;
+ } else
+ __cpu_model.__cpu_vendor = VENDOR_OTHER;
+
+ assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
+ assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
+ assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
+
+ return 0;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ctzdi2.c b/contrib/compiler-rt/lib/builtins/ctzdi2.c
new file mode 100644
index 000000000000..ef6d7fea1365
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ctzdi2.c
@@ -0,0 +1,40 @@
+/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of trailing 0-bits */
+
+#if !defined(__clang__) && \
+ ((defined(__sparc__) && defined(__arch64__)) || \
+ defined(__mips64) || \
+ (defined(__riscv) && __SIZEOF_POINTER__ >= 8))
+/* On 64-bit architectures with neither a native clz instruction nor a native
+ * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than
+ * __ctzsi2, leading to infinite recursion. */
+#define __builtin_ctz(a) __ctzsi2(a)
+extern si_int __ctzsi2(si_int);
+#endif
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzdi2(di_int a)
+{
+ dwords x;
+ x.all = a;
+ const si_int f = -(x.s.low == 0);
+ return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
+ (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/contrib/compiler-rt/lib/builtins/ctzsi2.c b/contrib/compiler-rt/lib/builtins/ctzsi2.c
new file mode 100644
index 000000000000..c69486ea445f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ctzsi2.c
@@ -0,0 +1,57 @@
+/* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of trailing 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzsi2(si_int a)
+{
+ su_int x = (su_int)a;
+ si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */
+ x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */
+ su_int r = t; /* r = [0, 16] */
+ /* return r + ctz(x) */
+ t = ((x & 0x00FF) == 0) << 3;
+ x >>= t; /* x = [0 - 0xFF] + higher garbage bits */
+ r += t; /* r = [0, 8, 16, 24] */
+ /* return r + ctz(x) */
+ t = ((x & 0x0F) == 0) << 2;
+ x >>= t; /* x = [0 - 0xF] + higher garbage bits */
+ r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */
+ /* return r + ctz(x) */
+ t = ((x & 0x3) == 0) << 1;
+ x >>= t;
+ x &= 3; /* x = [0 - 3] */
+ r += t; /* r = [0 - 30] and is even */
+ /* return r + ctz(x) */
+
+/* The branch-less return statement below is equivalent
+ * to the following switch statement:
+ * switch (x)
+ * {
+ * case 0:
+ * return r + 2;
+ * case 2:
+ * return r + 1;
+ * case 1:
+ * case 3:
+ * return r;
+ * }
+ */
+ return r + ((2 - (x >> 1)) & -((x & 1) == 0));
+}
diff --git a/contrib/compiler-rt/lib/builtins/ctzti2.c b/contrib/compiler-rt/lib/builtins/ctzti2.c
new file mode 100644
index 000000000000..45de682700cc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ctzti2.c
@@ -0,0 +1,33 @@
+/* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the number of trailing 0-bits */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzti2(ti_int a)
+{
+ twords x;
+ x.all = a;
+ const di_int f = -(x.s.low == 0);
+ return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
+ ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/divdc3.c b/contrib/compiler-rt/lib/builtins/divdc3.c
new file mode 100644
index 000000000000..392d6ecacde1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divdc3.c
@@ -0,0 +1,62 @@
+/* ===-- divdc3.c - Implement __divdc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divdc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Dcomplex
+__divdc3(double __a, double __b, double __c, double __d)
+{
+ int __ilogbw = 0;
+ double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d)));
+ if (crt_isfinite(__logbw))
+ {
+ __ilogbw = (int)__logbw;
+ __c = crt_scalbn(__c, -__ilogbw);
+ __d = crt_scalbn(__d, -__ilogbw);
+ }
+ double __denom = __c * __c + __d * __d;
+ Dcomplex z;
+ COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+ COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+ {
+ COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a;
+ COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b;
+ }
+ else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+ crt_isfinite(__c) && crt_isfinite(__d))
+ {
+ __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a);
+ __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b);
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+ }
+ else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+ crt_isfinite(__a) && crt_isfinite(__b))
+ {
+ __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c);
+ __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d);
+ COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/divdf3.c b/contrib/compiler-rt/lib/builtins/divdf3.c
new file mode 100644
index 000000000000..411c82ebb87a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divdf3.c
@@ -0,0 +1,193 @@
+//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__divdf3(fp_t a, fp_t b) {
+
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN / anything = qNaN
+ if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+ // anything / NaN = qNaN
+ if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity / infinity = NaN
+ if (bAbs == infRep) return fromRep(qnanRep);
+ // infinity / anything else = +/- infinity
+ else return fromRep(aAbs | quotientSign);
+ }
+
+ // anything else / infinity = +/- 0
+ if (bAbs == infRep) return fromRep(quotientSign);
+
+ if (!aAbs) {
+ // zero / zero = NaN
+ if (!bAbs) return fromRep(qnanRep);
+ // zero / anything else = +/- zero
+ else return fromRep(quotientSign);
+ }
+ // anything else / zero = +/- infinity
+ if (!bAbs) return fromRep(infRep | quotientSign);
+
+ // one or both of a or b is denormal, the other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit) scale += normalize(&aSignificand);
+ if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+ }
+
+ // Or in the implicit significand bit. (If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.)
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+ int quotientExponent = aExponent - bExponent + scale;
+
+ // Align the significand of b as a Q31 fixed-point number in the range
+ // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+ // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
+ // is accurate to about 3.5 binary digits.
+ const uint32_t q31b = bSignificand >> 21;
+ uint32_t recip32 = UINT32_C(0x7504f333) - q31b;
+
+ // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+ //
+ // x1 = x0 * (2 - x0 * b)
+ //
+ // This doubles the number of correct binary digits in the approximation
+ // with each iteration, so after three iterations, we have about 28 binary
+ // digits of accuracy.
+ uint32_t correction32;
+ correction32 = -((uint64_t)recip32 * q31b >> 32);
+ recip32 = (uint64_t)recip32 * correction32 >> 31;
+ correction32 = -((uint64_t)recip32 * q31b >> 32);
+ recip32 = (uint64_t)recip32 * correction32 >> 31;
+ correction32 = -((uint64_t)recip32 * q31b >> 32);
+ recip32 = (uint64_t)recip32 * correction32 >> 31;
+
+ // recip32 might have overflowed to exactly zero in the preceding
+ // computation if the high word of b is exactly 1.0. This would sabotage
+ // the full-width final stage of the computation that follows, so we adjust
+ // recip32 downward by one bit.
+ recip32--;
+
+ // We need to perform one more iteration to get us to 56 binary digits;
+ // The last iteration needs to happen with extra precision.
+ const uint32_t q63blo = bSignificand << 11;
+ uint64_t correction, reciprocal;
+ correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32));
+ uint32_t cHi = correction >> 32;
+ uint32_t cLo = correction;
+ reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32);
+
+ // We already adjusted the 32-bit estimate, now we need to adjust the final
+ // 64-bit reciprocal estimate downward to ensure that it is strictly smaller
+ // than the infinitely precise exact reciprocal. Because the computation
+ // of the Newton-Raphson step is truncating at every step, this adjustment
+ // is small; most of the work is already done.
+ reciprocal -= 2;
+
+ // The numerical reciprocal is accurate to within 2^-56, lies in the
+ // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+ // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
+ // in Q53 with the following properties:
+ //
+ // 1. q < a/b
+ // 2. q is in the interval [0.5, 2.0)
+ // 3. the error in q is bounded away from 2^-53 (actually, we have a
+ // couple of bits to spare, but this is all we need).
+
+ // We need a 64 x 64 multiply high to compute q, which isn't a basic
+ // operation in C, so we need to be a little bit fussy.
+ rep_t quotient, quotientLo;
+ wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
+
+ // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+ // In either case, we are going to compute a residual of the form
+ //
+ // r = a - q*b
+ //
+ // We know from the construction of q that r satisfies:
+ //
+ // 0 <= r < ulp(q)*b
+ //
+ // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
+ // already have the correct result. The exact halfway case cannot occur.
+ // We also take this time to right shift quotient if it falls in the [1,2)
+ // range and adjust the exponent accordingly.
+ rep_t residual;
+ if (quotient < (implicitBit << 1)) {
+ residual = (aSignificand << 53) - quotient * bSignificand;
+ quotientExponent--;
+ } else {
+ quotient >>= 1;
+ residual = (aSignificand << 52) - quotient * bSignificand;
+ }
+
+ const int writtenExponent = quotientExponent + exponentBias;
+
+ if (writtenExponent >= maxExponent) {
+ // If we have overflowed the exponent, return infinity.
+ return fromRep(infRep | quotientSign);
+ }
+
+ else if (writtenExponent < 1) {
+ // Flush denormals to zero. In the future, it would be nice to add
+ // code to round them correctly.
+ return fromRep(quotientSign);
+ }
+
+ else {
+ const bool round = (residual << 1) > bSignificand;
+ // Clear the implicit bit
+ rep_t absResult = quotient & significandMask;
+ // Insert the exponent
+ absResult |= (rep_t)writtenExponent << significandBits;
+ // Round
+ absResult += round;
+ // Insert the sign and return
+ const double result = fromRep(absResult | quotientSign);
+ return result;
+ }
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
+ return __divdf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/divdi3.c b/contrib/compiler-rt/lib/builtins/divdi3.c
new file mode 100644
index 000000000000..b8eebcb20465
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divdi3.c
@@ -0,0 +1,29 @@
+/* ===-- divdi3.c - Implement __divdi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI di_int
+__divdi3(di_int a, di_int b)
+{
+ const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+ di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */
+ di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */
+ a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
+ b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
+ s_a ^= s_b; /*sign of quotient */
+ return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */
+}
diff --git a/contrib/compiler-rt/lib/builtins/divmoddi4.c b/contrib/compiler-rt/lib/builtins/divmoddi4.c
new file mode 100644
index 000000000000..0d4df67a63e0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divmoddi4.c
@@ -0,0 +1,25 @@
+/*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b */
+
+COMPILER_RT_ABI di_int
+__divmoddi4(di_int a, di_int b, di_int* rem)
+{
+ di_int d = __divdi3(a,b);
+ *rem = a - (d*b);
+ return d;
+}
diff --git a/contrib/compiler-rt/lib/builtins/divmodsi4.c b/contrib/compiler-rt/lib/builtins/divmodsi4.c
new file mode 100644
index 000000000000..dabe2874397c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divmodsi4.c
@@ -0,0 +1,27 @@
+/*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divmodsi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b */
+
+COMPILER_RT_ABI si_int
+__divmodsi4(si_int a, si_int b, si_int* rem)
+{
+ si_int d = __divsi3(a,b);
+ *rem = a - (d*b);
+ return d;
+}
+
+
diff --git a/contrib/compiler-rt/lib/builtins/divsc3.c b/contrib/compiler-rt/lib/builtins/divsc3.c
new file mode 100644
index 000000000000..0d18a256c3de
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divsc3.c
@@ -0,0 +1,63 @@
+/*===-- divsc3.c - Implement __divsc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divsc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Fcomplex
+__divsc3(float __a, float __b, float __c, float __d)
+{
+ int __ilogbw = 0;
+ float __logbw =
+ __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d)));
+ if (crt_isfinite(__logbw))
+ {
+ __ilogbw = (int)__logbw;
+ __c = crt_scalbnf(__c, -__ilogbw);
+ __d = crt_scalbnf(__d, -__ilogbw);
+ }
+ float __denom = __c * __c + __d * __d;
+ Fcomplex z;
+ COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw);
+ COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+ {
+ COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a;
+ COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b;
+ }
+ else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+ crt_isfinite(__c) && crt_isfinite(__d))
+ {
+ __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+ __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+ }
+ else if (crt_isinf(__logbw) && __logbw > 0 &&
+ crt_isfinite(__a) && crt_isfinite(__b))
+ {
+ __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+ __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
+ COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/divsf3.c b/contrib/compiler-rt/lib/builtins/divsf3.c
new file mode 100644
index 000000000000..a74917fd1de5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divsf3.c
@@ -0,0 +1,177 @@
+//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__divsf3(fp_t a, fp_t b) {
+
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN / anything = qNaN
+ if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+ // anything / NaN = qNaN
+ if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity / infinity = NaN
+ if (bAbs == infRep) return fromRep(qnanRep);
+ // infinity / anything else = +/- infinity
+ else return fromRep(aAbs | quotientSign);
+ }
+
+ // anything else / infinity = +/- 0
+ if (bAbs == infRep) return fromRep(quotientSign);
+
+ if (!aAbs) {
+ // zero / zero = NaN
+ if (!bAbs) return fromRep(qnanRep);
+ // zero / anything else = +/- zero
+ else return fromRep(quotientSign);
+ }
+ // anything else / zero = +/- infinity
+ if (!bAbs) return fromRep(infRep | quotientSign);
+
+ // one or both of a or b is denormal, the other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit) scale += normalize(&aSignificand);
+ if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+ }
+
+ // Or in the implicit significand bit. (If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.)
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+ int quotientExponent = aExponent - bExponent + scale;
+
+ // Align the significand of b as a Q31 fixed-point number in the range
+ // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+ // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
+ // is accurate to about 3.5 binary digits.
+ uint32_t q31b = bSignificand << 8;
+ uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;
+
+ // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+ //
+ // x1 = x0 * (2 - x0 * b)
+ //
+ // This doubles the number of correct binary digits in the approximation
+ // with each iteration, so after three iterations, we have about 28 binary
+ // digits of accuracy.
+ uint32_t correction;
+ correction = -((uint64_t)reciprocal * q31b >> 32);
+ reciprocal = (uint64_t)reciprocal * correction >> 31;
+ correction = -((uint64_t)reciprocal * q31b >> 32);
+ reciprocal = (uint64_t)reciprocal * correction >> 31;
+ correction = -((uint64_t)reciprocal * q31b >> 32);
+ reciprocal = (uint64_t)reciprocal * correction >> 31;
+
+ // Exhaustive testing shows that the error in reciprocal after three steps
+ // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
+ // expectations. We bump the reciprocal by a tiny value to force the error
+ // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
+ // be specific). This also causes 1/1 to give a sensible approximation
+ // instead of zero (due to overflow).
+ reciprocal -= 2;
+
+ // The numerical reciprocal is accurate to within 2^-28, lies in the
+ // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
+ // than the true reciprocal of b. Multiplying a by this reciprocal thus
+ // gives a numerical q = a/b in Q24 with the following properties:
+ //
+ // 1. q < a/b
+ // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
+ // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
+ // from the fact that we truncate the product, and the 2^27 term
+ // is the error in the reciprocal of b scaled by the maximum
+ // possible value of a. As a consequence of this error bound,
+ // either q or nextafter(q) is the correctly rounded
+ rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32;
+
+ // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+ // In either case, we are going to compute a residual of the form
+ //
+ // r = a - q*b
+ //
+ // We know from the construction of q that r satisfies:
+ //
+ // 0 <= r < ulp(q)*b
+ //
+ // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
+ // already have the correct result. The exact halfway case cannot occur.
+ // We also take this time to right shift quotient if it falls in the [1,2)
+ // range and adjust the exponent accordingly.
+ rep_t residual;
+ if (quotient < (implicitBit << 1)) {
+ residual = (aSignificand << 24) - quotient * bSignificand;
+ quotientExponent--;
+ } else {
+ quotient >>= 1;
+ residual = (aSignificand << 23) - quotient * bSignificand;
+ }
+
+ const int writtenExponent = quotientExponent + exponentBias;
+
+ if (writtenExponent >= maxExponent) {
+ // If we have overflowed the exponent, return infinity.
+ return fromRep(infRep | quotientSign);
+ }
+
+ else if (writtenExponent < 1) {
+ // Flush denormals to zero. In the future, it would be nice to add
+ // code to round them correctly.
+ return fromRep(quotientSign);
+ }
+
+ else {
+ const bool round = (residual << 1) > bSignificand;
+ // Clear the implicit bit
+ rep_t absResult = quotient & significandMask;
+ // Insert the exponent
+ absResult |= (rep_t)writtenExponent << significandBits;
+ // Round
+ absResult += round;
+ // Insert the sign and return
+ return fromRep(absResult | quotientSign);
+ }
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
+ return __divsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/divsi3.c b/contrib/compiler-rt/lib/builtins/divsi3.c
new file mode 100644
index 000000000000..75aea008ddc1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divsi3.c
@@ -0,0 +1,39 @@
+/* ===-- divsi3.c - Implement __divsi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI si_int
+__divsi3(si_int a, si_int b)
+{
+ const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1;
+ si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */
+ si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */
+ a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
+ b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
+ s_a ^= s_b; /* sign of quotient */
+ /*
+ * On CPUs without unsigned hardware division support,
+ * this calls __udivsi3 (notice the cast to su_int).
+ * On CPUs with unsigned hardware division support,
+ * this uses the unsigned division instruction.
+ */
+ return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) COMPILER_RT_ALIAS(__divsi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/divtc3.c b/contrib/compiler-rt/lib/builtins/divtc3.c
new file mode 100644
index 000000000000..e5ea00d841e3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divtc3.c
@@ -0,0 +1,63 @@
+/*===-- divtc3.c - Implement __divtc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divtc3 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Lcomplex
+__divtc3(long double __a, long double __b, long double __c, long double __d)
+{
+ int __ilogbw = 0;
+ long double __logbw =
+ __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+ if (crt_isfinite(__logbw))
+ {
+ __ilogbw = (int)__logbw;
+ __c = crt_scalbnl(__c, -__ilogbw);
+ __d = crt_scalbnl(__d, -__ilogbw);
+ }
+ long double __denom = __c * __c + __d * __d;
+ Lcomplex z;
+ COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+ COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+ {
+ COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+ COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
+ }
+ else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+ crt_isfinite(__c) && crt_isfinite(__d))
+ {
+ __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
+ __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+ }
+ else if (crt_isinf(__logbw) && __logbw > 0.0 &&
+ crt_isfinite(__a) && crt_isfinite(__b))
+ {
+ __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
+ __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
+ COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/divtf3.c b/contrib/compiler-rt/lib/builtins/divtf3.c
new file mode 100644
index 000000000000..e81dab826bdd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divtf3.c
@@ -0,0 +1,203 @@
+//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
+
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN / anything = qNaN
+ if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+ // anything / NaN = qNaN
+ if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity / infinity = NaN
+ if (bAbs == infRep) return fromRep(qnanRep);
+ // infinity / anything else = +/- infinity
+ else return fromRep(aAbs | quotientSign);
+ }
+
+ // anything else / infinity = +/- 0
+ if (bAbs == infRep) return fromRep(quotientSign);
+
+ if (!aAbs) {
+ // zero / zero = NaN
+ if (!bAbs) return fromRep(qnanRep);
+ // zero / anything else = +/- zero
+ else return fromRep(quotientSign);
+ }
+ // anything else / zero = +/- infinity
+ if (!bAbs) return fromRep(infRep | quotientSign);
+
+ // one or both of a or b is denormal, the other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit) scale += normalize(&aSignificand);
+ if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+ }
+
+ // Or in the implicit significand bit. (If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.)
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+ int quotientExponent = aExponent - bExponent + scale;
+
+ // Align the significand of b as a Q63 fixed-point number in the range
+ // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
+ // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
+ // is accurate to about 3.5 binary digits.
+ const uint64_t q63b = bSignificand >> 49;
+ uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b;
+ // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
+
+ // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+ //
+ // x1 = x0 * (2 - x0 * b)
+ //
+ // This doubles the number of correct binary digits in the approximation
+ // with each iteration.
+ uint64_t correction64;
+ correction64 = -((rep_t)recip64 * q63b >> 64);
+ recip64 = (rep_t)recip64 * correction64 >> 63;
+ correction64 = -((rep_t)recip64 * q63b >> 64);
+ recip64 = (rep_t)recip64 * correction64 >> 63;
+ correction64 = -((rep_t)recip64 * q63b >> 64);
+ recip64 = (rep_t)recip64 * correction64 >> 63;
+ correction64 = -((rep_t)recip64 * q63b >> 64);
+ recip64 = (rep_t)recip64 * correction64 >> 63;
+ correction64 = -((rep_t)recip64 * q63b >> 64);
+ recip64 = (rep_t)recip64 * correction64 >> 63;
+
+ // recip64 might have overflowed to exactly zero in the preceeding
+ // computation if the high word of b is exactly 1.0. This would sabotage
+ // the full-width final stage of the computation that follows, so we adjust
+ // recip64 downward by one bit.
+ recip64--;
+
+ // We need to perform one more iteration to get us to 112 binary digits;
+ // The last iteration needs to happen with extra precision.
+ const uint64_t q127blo = bSignificand << 15;
+ rep_t correction, reciprocal;
+
+ // NOTE: This operation is equivalent to __multi3, which is not implemented
+ // in some architechure
+ rep_t r64q63, r64q127, r64cH, r64cL, dummy;
+ wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63);
+ wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127);
+
+ correction = -(r64q63 + (r64q127 >> 64));
+
+ uint64_t cHi = correction >> 64;
+ uint64_t cLo = correction;
+
+ wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH);
+ wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL);
+
+ reciprocal = r64cH + (r64cL >> 64);
+
+ // We already adjusted the 64-bit estimate, now we need to adjust the final
+ // 128-bit reciprocal estimate downward to ensure that it is strictly smaller
+ // than the infinitely precise exact reciprocal. Because the computation
+ // of the Newton-Raphson step is truncating at every step, this adjustment
+ // is small; most of the work is already done.
+ reciprocal -= 2;
+
+ // The numerical reciprocal is accurate to within 2^-112, lies in the
+ // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+ // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b
+ // in Q127 with the following properties:
+ //
+ // 1. q < a/b
+ // 2. q is in the interval [0.5, 2.0)
+ // 3. the error in q is bounded away from 2^-113 (actually, we have a
+ // couple of bits to spare, but this is all we need).
+
+ // We need a 128 x 128 multiply high to compute q, which isn't a basic
+ // operation in C, so we need to be a little bit fussy.
+ rep_t quotient, quotientLo;
+ wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
+
+ // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+ // In either case, we are going to compute a residual of the form
+ //
+ // r = a - q*b
+ //
+ // We know from the construction of q that r satisfies:
+ //
+ // 0 <= r < ulp(q)*b
+ //
+ // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we
+ // already have the correct result. The exact halfway case cannot occur.
+ // We also take this time to right shift quotient if it falls in the [1,2)
+ // range and adjust the exponent accordingly.
+ rep_t residual;
+ rep_t qb;
+
+ if (quotient < (implicitBit << 1)) {
+ wideMultiply(quotient, bSignificand, &dummy, &qb);
+ residual = (aSignificand << 113) - qb;
+ quotientExponent--;
+ } else {
+ quotient >>= 1;
+ wideMultiply(quotient, bSignificand, &dummy, &qb);
+ residual = (aSignificand << 112) - qb;
+ }
+
+ const int writtenExponent = quotientExponent + exponentBias;
+
+ if (writtenExponent >= maxExponent) {
+ // If we have overflowed the exponent, return infinity.
+ return fromRep(infRep | quotientSign);
+ }
+ else if (writtenExponent < 1) {
+ // Flush denormals to zero. In the future, it would be nice to add
+ // code to round them correctly.
+ return fromRep(quotientSign);
+ }
+ else {
+ const bool round = (residual << 1) >= bSignificand;
+ // Clear the implicit bit
+ rep_t absResult = quotient & significandMask;
+ // Insert the exponent
+ absResult |= (rep_t)writtenExponent << significandBits;
+ // Round
+ absResult += round;
+ // Insert the sign and return
+ const long double result = fromRep(absResult | quotientSign);
+ return result;
+ }
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/divti3.c b/contrib/compiler-rt/lib/builtins/divti3.c
new file mode 100644
index 000000000000..c73eae28fe08
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divti3.c
@@ -0,0 +1,33 @@
+/* ===-- divti3.c - Implement __divti3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI ti_int
+__divti3(ti_int a, ti_int b)
+{
+ const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+ ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */
+ ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */
+ a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
+ b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
+ s_a ^= s_b; /* sign of quotient */
+ return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/divxc3.c b/contrib/compiler-rt/lib/builtins/divxc3.c
new file mode 100644
index 000000000000..6f49280e5f61
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/divxc3.c
@@ -0,0 +1,63 @@
+/* ===-- divxc3.c - Implement __divxc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __divxc3 for the compiler_rt library.
+ *
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the quotient of (a + ib) / (c + id) */
+
+COMPILER_RT_ABI Lcomplex
+__divxc3(long double __a, long double __b, long double __c, long double __d)
+{
+ int __ilogbw = 0;
+ long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d)));
+ if (crt_isfinite(__logbw))
+ {
+ __ilogbw = (int)__logbw;
+ __c = crt_scalbnl(__c, -__ilogbw);
+ __d = crt_scalbnl(__d, -__ilogbw);
+ }
+ long double __denom = __c * __c + __d * __d;
+ Lcomplex z;
+ COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+ COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b)))
+ {
+ COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+ COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
+ }
+ else if ((crt_isinf(__a) || crt_isinf(__b)) &&
+ crt_isfinite(__c) && crt_isfinite(__d))
+ {
+ __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+ __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
+ }
+ else if (crt_isinf(__logbw) && __logbw > 0 &&
+ crt_isfinite(__a) && crt_isfinite(__b))
+ {
+ __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+ __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
+ COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d);
+ }
+ }
+ return z;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/emutls.c b/contrib/compiler-rt/lib/builtins/emutls.c
new file mode 100644
index 000000000000..ef95a1c260c1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/emutls.c
@@ -0,0 +1,405 @@
+/* ===---------- emutls.c - Implements __emutls_get_address ---------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "int_lib.h"
+#include "int_util.h"
+
+#ifdef __BIONIC__
+/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
+ to round 2. We need to delay deallocation because:
+ - Android versions older than M lack __cxa_thread_atexit_impl, so apps
+ use a pthread key destructor to call C++ destructors.
+ - Apps might use __thread/thread_local variables in pthread destructors.
+ We can't wait until the final two rounds, because jemalloc needs two rounds
+ after the final malloc/free call to free its thread-specific data (see
+ https://reviews.llvm.org/D46978#1107507). */
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
+#else
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
+#endif
+
+typedef struct emutls_address_array {
+ uintptr_t skip_destructor_rounds;
+ uintptr_t size; /* number of elements in the 'data' array */
+ void* data[];
+} emutls_address_array;
+
+static void emutls_shutdown(emutls_address_array *array);
+
+#ifndef _WIN32
+
+#include <pthread.h>
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t emutls_pthread_key;
+static bool emutls_key_created = false;
+
+typedef unsigned int gcc_word __attribute__((mode(word)));
+typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+
+/* Default is not to use posix_memalign, so systems like Android
+ * can use thread local data without heavier POSIX memory allocators.
+ */
+#ifndef EMUTLS_USE_POSIX_MEMALIGN
+#define EMUTLS_USE_POSIX_MEMALIGN 0
+#endif
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+ void *base;
+#if EMUTLS_USE_POSIX_MEMALIGN
+ if (posix_memalign(&base, align, size) != 0)
+ abort();
+#else
+ #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
+ char* object;
+ if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+ abort();
+ base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
+ & ~(uintptr_t)(align - 1));
+
+ ((void**)base)[-1] = object;
+#endif
+ return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+#if EMUTLS_USE_POSIX_MEMALIGN
+ free(base);
+#else
+ /* The mallocated address is in ((void**)base)[-1] */
+ free(((void**)base)[-1]);
+#endif
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+ pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+ return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
+static void emutls_key_destructor(void* ptr) {
+ emutls_address_array *array = (emutls_address_array*)ptr;
+ if (array->skip_destructor_rounds > 0) {
+ /* emutls is deallocated using a pthread key destructor. These
+ * destructors are called in several rounds to accommodate destructor
+ * functions that (re)initialize key values with pthread_setspecific.
+ * Delay the emutls deallocation to accommodate other end-of-thread
+ * cleanup tasks like calling thread_local destructors (e.g. the
+ * __cxa_thread_atexit fallback in libc++abi).
+ */
+ array->skip_destructor_rounds--;
+ emutls_setspecific(array);
+ } else {
+ emutls_shutdown(array);
+ free(ptr);
+ }
+}
+
+static __inline void emutls_init(void) {
+ if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+ abort();
+ emutls_key_created = true;
+}
+
+static __inline void emutls_init_once(void) {
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ pthread_once(&once, emutls_init);
+}
+
+static __inline void emutls_lock() {
+ pthread_mutex_lock(&emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+ pthread_mutex_unlock(&emutls_mutex);
+}
+
+#else /* _WIN32 */
+
+#include <windows.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <assert.h>
+
+static LPCRITICAL_SECTION emutls_mutex;
+static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
+
+typedef uintptr_t gcc_word;
+typedef void * gcc_pointer;
+
+static void win_error(DWORD last_err, const char *hint) {
+ char *buffer = NULL;
+ if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
+ fprintf(stderr, "Windows error: %s\n", buffer);
+ } else {
+ fprintf(stderr, "Unkown Windows error: %s\n", hint);
+ }
+ LocalFree(buffer);
+}
+
+static __inline void win_abort(DWORD last_err, const char *hint) {
+ win_error(last_err, hint);
+ abort();
+}
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+ void *base = _aligned_malloc(size, align);
+ if (!base)
+ win_abort(GetLastError(), "_aligned_malloc");
+ return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+ _aligned_free(base);
+}
+
+static void emutls_exit(void) {
+ if (emutls_mutex) {
+ DeleteCriticalSection(emutls_mutex);
+ _aligned_free(emutls_mutex);
+ emutls_mutex = NULL;
+ }
+ if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
+ emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index));
+ TlsFree(emutls_tls_index);
+ emutls_tls_index = TLS_OUT_OF_INDEXES;
+ }
+}
+
+#pragma warning (push)
+#pragma warning (disable : 4100)
+static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
+ emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
+ if (!emutls_mutex) {
+ win_error(GetLastError(), "_aligned_malloc");
+ return FALSE;
+ }
+ InitializeCriticalSection(emutls_mutex);
+
+ emutls_tls_index = TlsAlloc();
+ if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
+ emutls_exit();
+ win_error(GetLastError(), "TlsAlloc");
+ return FALSE;
+ }
+ atexit(&emutls_exit);
+ return TRUE;
+}
+
+static __inline void emutls_init_once(void) {
+ static INIT_ONCE once;
+ InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
+}
+
+static __inline void emutls_lock() {
+ EnterCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+ LeaveCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+ if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0)
+ win_abort(GetLastError(), "TlsSetValue");
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+ LPVOID value = TlsGetValue(emutls_tls_index);
+ if (value == NULL) {
+ const DWORD err = GetLastError();
+ if (err != ERROR_SUCCESS)
+ win_abort(err, "TlsGetValue");
+ }
+ return (emutls_address_array*) value;
+}
+
+/* Provide atomic load/store functions for emutls_get_index if built with MSVC.
+ */
+#if !defined(__ATOMIC_RELEASE)
+#include <intrin.h>
+
+enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
+
+static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
+ assert(type == __ATOMIC_ACQUIRE);
+ // These return the previous value - but since we do an OR with 0,
+ // it's equivalent to a plain load.
+#ifdef _WIN64
+ return InterlockedOr64(ptr, 0);
+#else
+ return InterlockedOr(ptr, 0);
+#endif
+}
+
+static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
+ assert(type == __ATOMIC_RELEASE);
+ InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
+}
+
+#endif /* __ATOMIC_RELEASE */
+
+#pragma warning (pop)
+
+#endif /* _WIN32 */
+
+static size_t emutls_num_object = 0; /* number of allocated TLS objects */
+
+/* Free the allocated TLS data
+ */
+static void emutls_shutdown(emutls_address_array *array) {
+ if (array) {
+ uintptr_t i;
+ for (i = 0; i < array->size; ++i) {
+ if (array->data[i])
+ emutls_memalign_free(array->data[i]);
+ }
+ }
+}
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+ /* Must use gcc_word here, instead of size_t, to match GCC. When
+ gcc_word is larger than size_t, the upper extra bits are all
+ zeros. We can use variables of size_t to operate on size and
+ align. */
+ gcc_word size; /* size of the object in bytes */
+ gcc_word align; /* alignment of the object in bytes */
+ union {
+ uintptr_t index; /* data[index-1] is the object address */
+ void* address; /* object address, when in single thread env */
+ } object;
+ void* value; /* null or non-zero initial value for the object */
+} __emutls_control;
+
+/* Emulated TLS objects are always allocated at run-time. */
+static __inline void *emutls_allocate_object(__emutls_control *control) {
+ /* Use standard C types, check with gcc's emutls.o. */
+ COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
+ COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
+
+ size_t size = control->size;
+ size_t align = control->align;
+ void* base;
+ if (align < sizeof(void*))
+ align = sizeof(void*);
+ /* Make sure that align is power of 2. */
+ if ((align & (align - 1)) != 0)
+ abort();
+
+ base = emutls_memalign_alloc(align, size);
+ if (control->value)
+ memcpy(base, control->value, size);
+ else
+ memset(base, 0, size);
+ return base;
+}
+
+
+/* Returns control->object.index; set index if not allocated yet. */
+static __inline uintptr_t emutls_get_index(__emutls_control *control) {
+ uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
+ if (!index) {
+ emutls_init_once();
+ emutls_lock();
+ index = control->object.index;
+ if (!index) {
+ index = ++emutls_num_object;
+ __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
+ }
+ emutls_unlock();
+ }
+ return index;
+}
+
+/* Updates newly allocated thread local emutls_address_array. */
+static __inline void emutls_check_array_set_size(emutls_address_array *array,
+ uintptr_t size) {
+ if (array == NULL)
+ abort();
+ array->size = size;
+ emutls_setspecific(array);
+}
+
+/* Returns the new 'data' array size, number of elements,
+ * which must be no smaller than the given index.
+ */
+static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
+ /* Need to allocate emutls_address_array with extra slots
+ * to store the header.
+ * Round up the emutls_address_array size to multiple of 16.
+ */
+ uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
+ return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
+}
+
+/* Returns the size in bytes required for an emutls_address_array with
+ * N number of elements for data field.
+ */
+static __inline uintptr_t emutls_asize(uintptr_t N) {
+ return N * sizeof(void *) + sizeof(emutls_address_array);
+}
+
+/* Returns the thread local emutls_address_array.
+ * Extends its size if necessary to hold address at index.
+ */
+static __inline emutls_address_array *
+emutls_get_address_array(uintptr_t index) {
+ emutls_address_array* array = emutls_getspecific();
+ if (array == NULL) {
+ uintptr_t new_size = emutls_new_data_array_size(index);
+ array = (emutls_address_array*) malloc(emutls_asize(new_size));
+ if (array) {
+ memset(array->data, 0, new_size * sizeof(void*));
+ array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
+ }
+ emutls_check_array_set_size(array, new_size);
+ } else if (index > array->size) {
+ uintptr_t orig_size = array->size;
+ uintptr_t new_size = emutls_new_data_array_size(index);
+ array = (emutls_address_array*) realloc(array, emutls_asize(new_size));
+ if (array)
+ memset(array->data + orig_size, 0,
+ (new_size - orig_size) * sizeof(void*));
+ emutls_check_array_set_size(array, new_size);
+ }
+ return array;
+}
+
+void* __emutls_get_address(__emutls_control* control) {
+ uintptr_t index = emutls_get_index(control);
+ emutls_address_array* array = emutls_get_address_array(index--);
+ if (array->data[index] == NULL)
+ array->data[index] = emutls_allocate_object(control);
+ return array->data[index];
+}
+
+#ifdef __BIONIC__
+/* Called by Bionic on dlclose to delete the emutls pthread key. */
+__attribute__((visibility("hidden")))
+void __emutls_unregister_key(void) {
+ if (emutls_key_created) {
+ pthread_key_delete(emutls_pthread_key);
+ emutls_key_created = false;
+ }
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/enable_execute_stack.c b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c
new file mode 100644
index 000000000000..327d460b4253
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/enable_execute_stack.c
@@ -0,0 +1,72 @@
+/* ===-- enable_execute_stack.c - Implement __enable_execute_stack ---------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
+
+/* #include "config.h"
+ * FIXME: CMake - include when cmake system is ready.
+ * Remove #define HAVE_SYSCONF 1 line.
+ */
+#define HAVE_SYSCONF 1
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#else
+#ifndef __APPLE__
+#include <unistd.h>
+#endif /* __APPLE__ */
+#endif /* _WIN32 */
+
+#if __LP64__
+ #define TRAMPOLINE_SIZE 48
+#else
+ #define TRAMPOLINE_SIZE 40
+#endif
+
+/*
+ * The compiler generates calls to __enable_execute_stack() when creating
+ * trampoline functions on the stack for use with nested functions.
+ * It is expected to mark the page(s) containing the address
+ * and the next 48 bytes as executable. Since the stack is normally rw-
+ * that means changing the protection on those page(s) to rwx.
+ */
+
+COMPILER_RT_ABI void
+__enable_execute_stack(void* addr)
+{
+
+#if _WIN32
+ MEMORY_BASIC_INFORMATION mbi;
+ if (!VirtualQuery (addr, &mbi, sizeof(mbi)))
+ return; /* We should probably assert here because there is no return value */
+ VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect);
+#else
+#if __APPLE__
+ /* On Darwin, pagesize is always 4096 bytes */
+ const uintptr_t pageSize = 4096;
+#elif !defined(HAVE_SYSCONF)
+#error "HAVE_SYSCONF not defined! See enable_execute_stack.c"
+#else
+ const uintptr_t pageSize = sysconf(_SC_PAGESIZE);
+#endif /* __APPLE__ */
+
+ const uintptr_t pageAlignMask = ~(pageSize-1);
+ uintptr_t p = (uintptr_t)addr;
+ unsigned char* startPage = (unsigned char*)(p & pageAlignMask);
+ unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask);
+ size_t length = endPage - startPage;
+ (void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
+}
diff --git a/contrib/compiler-rt/lib/builtins/eprintf.c b/contrib/compiler-rt/lib/builtins/eprintf.c
new file mode 100644
index 000000000000..89f34b154577
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/eprintf.c
@@ -0,0 +1,35 @@
+/* ===---------- eprintf.c - Implements __eprintf --------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+
+
+#include "int_lib.h"
+#include <stdio.h>
+
+
+/*
+ * __eprintf() was used in an old version of <assert.h>.
+ * It can eventually go away, but it is needed when linking
+ * .o files built with the old <assert.h>.
+ *
+ * It should never be exported from a dylib, so it is marked
+ * visibility hidden.
+ */
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+COMPILER_RT_ABI void
+__eprintf(const char* format, const char* assertion_expression,
+ const char* line, const char* file)
+{
+ fprintf(stderr, format, assertion_expression, line, file);
+ fflush(stderr);
+ compilerrt_abort();
+}
diff --git a/contrib/compiler-rt/lib/builtins/extenddftf2.c b/contrib/compiler-rt/lib/builtins/extenddftf2.c
new file mode 100644
index 000000000000..86dab8f03a8f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/extenddftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_DOUBLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extenddftf2(double a) {
+ return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/extendhfsf2.c b/contrib/compiler-rt/lib/builtins/extendhfsf2.c
new file mode 100644
index 000000000000..d9c0db84b0ce
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/extendhfsf2.c
@@ -0,0 +1,33 @@
+//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define SRC_HALF
+#define DST_SINGLE
+#include "fp_extend_impl.inc"
+
+// Use a forwarding definition and noinline to implement a poor man's alias,
+// as there isn't a good cross-platform way of defining one.
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
+ return __extendXfYf2__(a);
+}
+
+COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
+ return __extendhfsf2(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_h2f(uint16_t a) {
+ return __extendhfsf2(a);
+}
+#else
+AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/extendsfdf2.c b/contrib/compiler-rt/lib/builtins/extendsfdf2.c
new file mode 100644
index 000000000000..3d84529a6c53
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/extendsfdf2.c
@@ -0,0 +1,27 @@
+//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define SRC_SINGLE
+#define DST_DOUBLE
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI double __extendsfdf2(float a) {
+ return __extendXfYf2__(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_f2d(float a) {
+ return __extendsfdf2(a);
+}
+#else
+AEABI_RTABI double __aeabi_f2d(float a) COMPILER_RT_ALIAS(__extendsfdf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/extendsftf2.c b/contrib/compiler-rt/lib/builtins/extendsftf2.c
new file mode 100644
index 000000000000..2eeeba284845
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/extendsftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_SINGLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extendsftf2(float a) {
+ return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ffsdi2.c b/contrib/compiler-rt/lib/builtins/ffsdi2.c
new file mode 100644
index 000000000000..a5ac9900ff10
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ffsdi2.c
@@ -0,0 +1,33 @@
+/* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffsdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffsdi2(di_int a)
+{
+ dwords x;
+ x.all = a;
+ if (x.s.low == 0)
+ {
+ if (x.s.high == 0)
+ return 0;
+ return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
+ }
+ return __builtin_ctz(x.s.low) + 1;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ffssi2.c b/contrib/compiler-rt/lib/builtins/ffssi2.c
new file mode 100644
index 000000000000..e5180eff5e08
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ffssi2.c
@@ -0,0 +1,29 @@
+/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffssi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffssi2(si_int a)
+{
+ if (a == 0)
+ {
+ return 0;
+ }
+ return __builtin_ctz(a) + 1;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ffsti2.c b/contrib/compiler-rt/lib/builtins/ffsti2.c
new file mode 100644
index 000000000000..dcdb3bd7f807
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ffsti2.c
@@ -0,0 +1,37 @@
+/* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffsti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffsti2(ti_int a)
+{
+ twords x;
+ x.all = a;
+ if (x.s.low == 0)
+ {
+ if (x.s.high == 0)
+ return 0;
+ return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT);
+ }
+ return __builtin_ctzll(x.s.low) + 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/fixdfdi.c b/contrib/compiler-rt/lib/builtins/fixdfdi.c
new file mode 100644
index 000000000000..54e312d3c8f7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixdfdi.c
@@ -0,0 +1,55 @@
+/* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+
+COMPILER_RT_ABI di_int
+__fixdfdi(double a)
+{
+ if (a < 0.0) {
+ return -__fixunsdfdi(-a);
+ }
+ return __fixunsdfdi(a);
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixdfdi(fp_t a) {
+ return __fixint(a);
+}
+
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI di_int __aeabi_d2lz(fp_t a) {
+ return __fixdfdi(a);
+}
+#else
+AEABI_RTABI di_int __aeabi_d2lz(fp_t a) COMPILER_RT_ALIAS(__fixdfdi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixdfsi.c b/contrib/compiler-rt/lib/builtins/fixdfsi.c
new file mode 100644
index 000000000000..5b9588175717
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixdfsi.c
@@ -0,0 +1,30 @@
+/* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int
+__fixdfsi(fp_t a) {
+ return __fixint(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) {
+ return __fixdfsi(a);
+}
+#else
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) COMPILER_RT_ALIAS(__fixdfsi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixdfti.c b/contrib/compiler-rt/lib/builtins/fixdfti.c
new file mode 100644
index 000000000000..aaf225e74f86
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixdfti.c
@@ -0,0 +1,26 @@
+/* ===-- fixdfti.c - Implement __fixdfti -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixdfti(fp_t a) {
+ return __fixint(a);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/fixsfdi.c b/contrib/compiler-rt/lib/builtins/fixsfdi.c
new file mode 100644
index 000000000000..32e87c60889f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixsfdi.c
@@ -0,0 +1,55 @@
+/* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+
+COMPILER_RT_ABI di_int
+__fixsfdi(float a)
+{
+ if (a < 0.0f) {
+ return -__fixunssfdi(-a);
+ }
+ return __fixunssfdi(a);
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixsfdi(fp_t a) {
+ return __fixint(a);
+}
+
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI di_int __aeabi_f2lz(fp_t a) {
+ return __fixsfdi(a);
+}
+#else
+AEABI_RTABI di_int __aeabi_f2lz(fp_t a) COMPILER_RT_ALIAS(__fixsfdi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixsfsi.c b/contrib/compiler-rt/lib/builtins/fixsfsi.c
new file mode 100644
index 000000000000..e94e5f3dcd68
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixsfsi.c
@@ -0,0 +1,30 @@
+/* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int
+__fixsfsi(fp_t a) {
+ return __fixint(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) {
+ return __fixsfsi(a);
+}
+#else
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) COMPILER_RT_ALIAS(__fixsfsi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixsfti.c b/contrib/compiler-rt/lib/builtins/fixsfti.c
new file mode 100644
index 000000000000..3a159b3e18e4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixsfti.c
@@ -0,0 +1,26 @@
+/* ===-- fixsfti.c - Implement __fixsfti -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixsfti(fp_t a) {
+ return __fixint(a);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/fixtfdi.c b/contrib/compiler-rt/lib/builtins/fixtfdi.c
new file mode 100644
index 000000000000..bc9dea1f4f81
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixtfdi.c
@@ -0,0 +1,23 @@
+/* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int
+__fixtfdi(fp_t a) {
+ return __fixint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixtfsi.c b/contrib/compiler-rt/lib/builtins/fixtfsi.c
new file mode 100644
index 000000000000..feb3de885090
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixtfsi.c
@@ -0,0 +1,23 @@
+/* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int
+__fixtfsi(fp_t a) {
+ return __fixint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixtfti.c b/contrib/compiler-rt/lib/builtins/fixtfti.c
new file mode 100644
index 000000000000..ee4ada85cb4a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixtfti.c
@@ -0,0 +1,23 @@
+/* ===-- fixtfti.c - Implement __fixtfti -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int
+__fixtfti(fp_t a) {
+ return __fixint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfdi.c b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c
new file mode 100644
index 000000000000..bfe4dbb25656
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsdfdi.c
@@ -0,0 +1,52 @@
+/* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int
+__fixunsdfdi(double a)
+{
+ if (a <= 0.0) return 0;
+ su_int high = a / 4294967296.f; /* a / 0x1p32f; */
+ su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */
+ return ((du_int)high << 32) | low;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunsdfdi(fp_t a) {
+ return __fixuint(a);
+}
+
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) {
+ return __fixunsdfdi(a);
+}
+#else
+AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfdi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfsi.c b/contrib/compiler-rt/lib/builtins/fixunsdfsi.c
new file mode 100644
index 000000000000..3c5355beae1a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsdfsi.c
@@ -0,0 +1,29 @@
+/* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int
+__fixunsdfsi(fp_t a) {
+ return __fixuint(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) {
+ return __fixunsdfsi(a);
+}
+#else
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfsi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunsdfti.c b/contrib/compiler-rt/lib/builtins/fixunsdfti.c
new file mode 100644
index 000000000000..f8046a02632b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsdfti.c
@@ -0,0 +1,23 @@
+/* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunsdfti(fp_t a) {
+ return __fixuint(a);
+}
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/fixunssfdi.c b/contrib/compiler-rt/lib/builtins/fixunssfdi.c
new file mode 100644
index 000000000000..080a25bb1e99
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunssfdi.c
@@ -0,0 +1,53 @@
+/* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; can set the invalid
+ * flag as a side-effect of computation.
+ */
+
+COMPILER_RT_ABI du_int
+__fixunssfdi(float a)
+{
+ if (a <= 0.0f) return 0;
+ double da = a;
+ su_int high = da / 4294967296.f; /* da / 0x1p32f; */
+ su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */
+ return ((du_int)high << 32) | low;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no
+ * flags to set, and we don't want to code-gen to an unknown soft-float
+ * implementation.
+ */
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunssfdi(fp_t a) {
+ return __fixuint(a);
+}
+
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) {
+ return __fixunssfdi(a);
+}
+#else
+AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunssfdi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunssfsi.c b/contrib/compiler-rt/lib/builtins/fixunssfsi.c
new file mode 100644
index 000000000000..eca2916a5c88
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunssfsi.c
@@ -0,0 +1,33 @@
+/* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunssfsi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int
+__fixunssfsi(fp_t a) {
+ return __fixuint(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) {
+ return __fixunssfsi(a);
+}
+#else
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunssfsi);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunssfti.c b/contrib/compiler-rt/lib/builtins/fixunssfti.c
new file mode 100644
index 000000000000..862d7bd6c7af
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunssfti.c
@@ -0,0 +1,26 @@
+/* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunssfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT)
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunssfti(fp_t a) {
+ return __fixuint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunstfdi.c b/contrib/compiler-rt/lib/builtins/fixunstfdi.c
new file mode 100644
index 000000000000..b2995f65834a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunstfdi.c
@@ -0,0 +1,22 @@
+/* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int
+__fixunstfdi(fp_t a) {
+ return __fixuint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunstfsi.c b/contrib/compiler-rt/lib/builtins/fixunstfsi.c
new file mode 100644
index 000000000000..b5d3f6a7d38d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunstfsi.c
@@ -0,0 +1,22 @@
+/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int
+__fixunstfsi(fp_t a) {
+ return __fixuint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunstfti.c b/contrib/compiler-rt/lib/builtins/fixunstfti.c
new file mode 100644
index 000000000000..22ff9dfc0339
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunstfti.c
@@ -0,0 +1,22 @@
+/* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int
+__fixunstfti(fp_t a) {
+ return __fixuint(a);
+}
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfdi.c b/contrib/compiler-rt/lib/builtins/fixunsxfdi.c
new file mode 100644
index 000000000000..075304e78dc9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsxfdi.c
@@ -0,0 +1,46 @@
+/* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfdi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a unsigned long long, rounding toward zero.
+ * Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ * du_int is a 64 bit integral type
+ * value in long double is representable in du_int or is negative
+ * (no range checking performed)
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI du_int
+__fixunsxfdi(long double a)
+{
+ long_double_bits fb;
+ fb.f = a;
+ int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+ if (e < 0 || (fb.u.high.s.low & 0x00008000))
+ return 0;
+ if ((unsigned)e > sizeof(du_int) * CHAR_BIT)
+ return ~(du_int)0;
+ return fb.u.low.all >> (63 - e);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfsi.c b/contrib/compiler-rt/lib/builtins/fixunsxfsi.c
new file mode 100644
index 000000000000..c3c70f743de8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsxfsi.c
@@ -0,0 +1,45 @@
+/* ===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfsi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a unsigned int, rounding toward zero.
+ * Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ * su_int is a 32 bit integral type
+ * value in long double is representable in su_int or is negative
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI su_int
+__fixunsxfsi(long double a)
+{
+ long_double_bits fb;
+ fb.f = a;
+ int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+ if (e < 0 || (fb.u.high.s.low & 0x00008000))
+ return 0;
+ if ((unsigned)e > sizeof(su_int) * CHAR_BIT)
+ return ~(su_int)0;
+ return fb.u.low.s.high >> (31 - e);
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/compiler-rt/lib/builtins/fixunsxfti.c b/contrib/compiler-rt/lib/builtins/fixunsxfti.c
new file mode 100644
index 000000000000..fb39d00ff5b2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixunsxfti.c
@@ -0,0 +1,50 @@
+/* ===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixunsxfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a unsigned long long, rounding toward zero.
+ * Negative values all become zero.
+ */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ * tu_int is a 128 bit integral type
+ * value in long double is representable in tu_int or is negative
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI tu_int
+__fixunsxfti(long double a)
+{
+ long_double_bits fb;
+ fb.f = a;
+ int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+ if (e < 0 || (fb.u.high.s.low & 0x00008000))
+ return 0;
+ if ((unsigned)e > sizeof(tu_int) * CHAR_BIT)
+ return ~(tu_int)0;
+ tu_int r = fb.u.low.all;
+ if (e > 63)
+ r <<= (e - 63);
+ else
+ r >>= (63 - e);
+ return r;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/fixxfdi.c b/contrib/compiler-rt/lib/builtins/fixxfdi.c
new file mode 100644
index 000000000000..011787f9e4b4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixxfdi.c
@@ -0,0 +1,48 @@
+/* ===-- fixxfdi.c - Implement __fixxfdi -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixxfdi for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a signed long long, rounding toward zero. */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ * di_int is a 64 bit integral type
+ * value in long double is representable in di_int (no range checking performed)
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI di_int
+__fixxfdi(long double a)
+{
+ const di_int di_max = (di_int)((~(du_int)0) / 2);
+ const di_int di_min = -di_max - 1;
+ long_double_bits fb;
+ fb.f = a;
+ int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+ if (e < 0)
+ return 0;
+ if ((unsigned)e >= sizeof(di_int) * CHAR_BIT)
+ return a > 0 ? di_max : di_min;
+ di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15);
+ di_int r = fb.u.low.all;
+ r = (du_int)r >> (63 - e);
+ return (r ^ s) - s;
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/compiler-rt/lib/builtins/fixxfti.c b/contrib/compiler-rt/lib/builtins/fixxfti.c
new file mode 100644
index 000000000000..968a4f0d5eea
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fixxfti.c
@@ -0,0 +1,51 @@
+/* ===-- fixxfti.c - Implement __fixxfti -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __fixxfti for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a signed long long, rounding toward zero. */
+
+/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
+ * ti_int is a 128 bit integral type
+ * value in long double is representable in ti_int
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI ti_int
+__fixxfti(long double a)
+{
+ const ti_int ti_max = (ti_int)((~(tu_int)0) / 2);
+ const ti_int ti_min = -ti_max - 1;
+ long_double_bits fb;
+ fb.f = a;
+ int e = (fb.u.high.s.low & 0x00007FFF) - 16383;
+ if (e < 0)
+ return 0;
+ ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15);
+ ti_int r = fb.u.low.all;
+ if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT)
+ return a > 0 ? ti_max : ti_min;
+ if (e > 63)
+ r <<= (e - 63);
+ else
+ r >>= (63 - e);
+ return (r ^ s) - s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floatdidf.c b/contrib/compiler-rt/lib/builtins/floatdidf.c
new file mode 100644
index 000000000000..36b856e078d4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatdidf.c
@@ -0,0 +1,115 @@
+/*===-- floatdidf.c - Implement __floatdidf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdidf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type
+ * di_int is a 64 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; we'll set the inexact flag
+ * as a side-effect of this computation.
+ */
+
+COMPILER_RT_ABI double
+__floatdidf(di_int a)
+{
+ static const double twop52 = 4503599627370496.0; // 0x1.0p52
+ static const double twop32 = 4294967296.0; // 0x1.0p32
+
+ union { int64_t x; double d; } low = { .d = twop52 };
+
+ const double high = (int32_t)(a >> 32) * twop32;
+ low.x |= a & INT64_C(0x00000000ffffffff);
+
+ const double result = (high - twop52) + low.d;
+ return result;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no flags to
+ * set, and we don't want to code-gen to an unknown soft-float implementation.
+ */
+
+COMPILER_RT_ABI double
+__floatdidf(di_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(di_int) * CHAR_BIT;
+ const di_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __builtin_clzll(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > DBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit DBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit DBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case DBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case DBL_MANT_DIG + 2:
+ break;
+ default:
+ a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) |
+ ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+ if (a & ((du_int)1 << DBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (DBL_MANT_DIG - sd);
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ double_bits fb;
+ fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */
+ ((e + 1023) << 20) | /* exponent */
+ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+ fb.u.s.low = (su_int)a; /* mantissa-low */
+ return fb.f;
+}
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_l2d(di_int a) {
+ return __floatdidf(a);
+}
+#else
+AEABI_RTABI double __aeabi_l2d(di_int a) COMPILER_RT_ALIAS(__floatdidf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatdisf.c b/contrib/compiler-rt/lib/builtins/floatdisf.c
new file mode 100644
index 000000000000..a2f09eb2ed2c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatdisf.c
@@ -0,0 +1,88 @@
+/*===-- floatdisf.c - Implement __floatdisf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdisf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a float, rounding toward even.*/
+
+/* Assumption: float is a IEEE 32 bit floating point type
+ * di_int is a 64 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI float
+__floatdisf(di_int a)
+{
+ if (a == 0)
+ return 0.0F;
+ const unsigned N = sizeof(di_int) * CHAR_BIT;
+ const di_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __builtin_clzll(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > FLT_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit FLT_MANT_DIG-1 bits to the right of 1
+ * Q = bit FLT_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case FLT_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case FLT_MANT_DIG + 2:
+ break;
+ default:
+ a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) |
+ ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+ if (a & ((du_int)1 << FLT_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (FLT_MANT_DIG - sd);
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ float_bits fb;
+ fb.u = ((su_int)s & 0x80000000) | /* sign */
+ ((e + 127) << 23) | /* exponent */
+ ((su_int)a & 0x007FFFFF); /* mantissa */
+ return fb.f;
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_l2f(di_int a) {
+ return __floatdisf(a);
+}
+#else
+AEABI_RTABI float __aeabi_l2f(di_int a) COMPILER_RT_ALIAS(__floatdisf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatditf.c b/contrib/compiler-rt/lib/builtins/floatditf.c
new file mode 100644
index 000000000000..cd51dd8aade4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatditf.c
@@ -0,0 +1,50 @@
+//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements di_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatditf(di_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ du_int aAbs = (du_int)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = ~(du_int)a + 1U;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatdixf.c b/contrib/compiler-rt/lib/builtins/floatdixf.c
new file mode 100644
index 000000000000..d39e81d7ca75
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatdixf.c
@@ -0,0 +1,46 @@
+/* ===-- floatdixf.c - Implement __floatdixf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatdixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ * di_int is a 64 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floatdixf(di_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(di_int) * CHAR_BIT;
+ const di_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int clz = __builtin_clzll(a);
+ int e = (N - 1) - clz ; /* exponent */
+ long_double_bits fb;
+ fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */
+ (e + 16383); /* exponent */
+ fb.u.low.all = a << clz; /* mantissa */
+ return fb.f;
+}
+
+#endif /* !_ARCH_PPC */
diff --git a/contrib/compiler-rt/lib/builtins/floatsidf.c b/contrib/compiler-rt/lib/builtins/floatsidf.c
new file mode 100644
index 000000000000..fe051123ce7c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatsidf.c
@@ -0,0 +1,61 @@
+//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t
+__floatsidf(int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ if (a < 0) {
+ sign = signBit;
+ a = -a;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit. Extra
+ // cast to unsigned int is necessary to get the correct behavior for
+ // the input INT_MIN.
+ const int shift = significandBits - exponent;
+ result = (rep_t)(unsigned int)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_i2d(int a) {
+ return __floatsidf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_i2d(int a) COMPILER_RT_ALIAS(__floatsidf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatsisf.c b/contrib/compiler-rt/lib/builtins/floatsisf.c
new file mode 100644
index 000000000000..bf087ee3c295
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatsisf.c
@@ -0,0 +1,67 @@
+//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t
+__floatsisf(int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ if (a < 0) {
+ sign = signBit;
+ a = -a;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(a);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ if (exponent <= significandBits) {
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+ } else {
+ const int shift = exponent - significandBits;
+ result = (rep_t)a >> shift ^ implicitBit;
+ rep_t round = (rep_t)a << (typeWidth - shift);
+ if (round > signBit) result++;
+ if (round == signBit) result += result & 1;
+ }
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_i2f(int a) {
+ return __floatsisf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_i2f(int a) COMPILER_RT_ALIAS(__floatsisf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatsitf.c b/contrib/compiler-rt/lib/builtins/floatsitf.c
new file mode 100644
index 000000000000..f0abca363b5e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatsitf.c
@@ -0,0 +1,50 @@
+//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatsitf(int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ unsigned aAbs = (unsigned)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = ~(unsigned)a + 1U;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floattidf.c b/contrib/compiler-rt/lib/builtins/floattidf.c
new file mode 100644
index 000000000000..2702a3c8a2db
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floattidf.c
@@ -0,0 +1,83 @@
+/* ===-- floattidf.c - Implement __floattidf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a double, rounding toward even.*/
+
+/* Assumption: double is a IEEE 64 bit floating point type
+ * ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI double
+__floattidf(ti_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(ti_int) * CHAR_BIT;
+ const ti_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > DBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit DBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit DBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case DBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case DBL_MANT_DIG + 2:
+ break;
+ default:
+ a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << DBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (DBL_MANT_DIG - sd);
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ double_bits fb;
+ fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */
+ ((e + 1023) << 20) | /* exponent */
+ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+ fb.u.s.low = (su_int)a; /* mantissa-low */
+ return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floattisf.c b/contrib/compiler-rt/lib/builtins/floattisf.c
new file mode 100644
index 000000000000..f1b585f2c326
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floattisf.c
@@ -0,0 +1,82 @@
+/* ===-- floattisf.c - Implement __floattisf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattisf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type
+ * ti_int is a 128 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI float
+__floattisf(ti_int a)
+{
+ if (a == 0)
+ return 0.0F;
+ const unsigned N = sizeof(ti_int) * CHAR_BIT;
+ const ti_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > FLT_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit FLT_MANT_DIG-1 bits to the right of 1
+ * Q = bit FLT_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case FLT_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case FLT_MANT_DIG + 2:
+ break;
+ default:
+ a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << FLT_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (FLT_MANT_DIG - sd);
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ float_bits fb;
+ fb.u = ((su_int)s & 0x80000000) | /* sign */
+ ((e + 127) << 23) | /* exponent */
+ ((su_int)a & 0x007FFFFF); /* mantissa */
+ return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floattitf.c b/contrib/compiler-rt/lib/builtins/floattitf.c
new file mode 100644
index 000000000000..994fded3947b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floattitf.c
@@ -0,0 +1,82 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ * ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(ti_int) * CHAR_BIT;
+ const ti_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG) {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd) {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ } else {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+
+ long_double_bits fb;
+ fb.u.high.all = (s & 0x8000000000000000LL) /* sign */
+ | (du_int)(e + 16383) << 48 /* exponent */
+ | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */
+ fb.u.low.all = (du_int)(a);
+ return fb.f;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floattixf.c b/contrib/compiler-rt/lib/builtins/floattixf.c
new file mode 100644
index 000000000000..1203b3a96e75
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floattixf.c
@@ -0,0 +1,84 @@
+/* ===-- floattixf.c - Implement __floattixf -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floattixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ * ti_int is a 128 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floattixf(ti_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(ti_int) * CHAR_BIT;
+ const ti_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+ long_double_bits fb;
+ fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */
+ (e + 16383); /* exponent */
+ fb.u.low.all = (du_int)a; /* mantissa */
+ return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floatundidf.c b/contrib/compiler-rt/lib/builtins/floatundidf.c
new file mode 100644
index 000000000000..8bc2a096324f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatundidf.c
@@ -0,0 +1,114 @@
+/* ===-- floatundidf.c - Implement __floatundidf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type
+ * du_int is a 64 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+#ifndef __SOFT_FP__
+/* Support for systems that have hardware floating-point; we'll set the inexact flag
+ * as a side-effect of this computation.
+ */
+
+COMPILER_RT_ABI double
+__floatundidf(du_int a)
+{
+ static const double twop52 = 4503599627370496.0; // 0x1.0p52
+ static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84
+ static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84
+
+ union { uint64_t x; double d; } high = { .d = twop84 };
+ union { uint64_t x; double d; } low = { .d = twop52 };
+
+ high.x |= a >> 32;
+ low.x |= a & UINT64_C(0x00000000ffffffff);
+
+ const double result = (high.d - twop84_plus_twop52) + low.d;
+ return result;
+}
+
+#else
+/* Support for systems that don't have hardware floating-point; there are no flags to
+ * set, and we don't want to code-gen to an unknown soft-float implementation.
+ */
+
+COMPILER_RT_ABI double
+__floatundidf(du_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(du_int) * CHAR_BIT;
+ int sd = N - __builtin_clzll(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > DBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit DBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit DBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case DBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case DBL_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (DBL_MANT_DIG+2))) |
+ ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+ if (a & ((du_int)1 << DBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (DBL_MANT_DIG - sd);
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ double_bits fb;
+ fb.u.s.high = ((e + 1023) << 20) | /* exponent */
+ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+ fb.u.s.low = (su_int)a; /* mantissa-low */
+ return fb.f;
+}
+#endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_ul2d(du_int a) {
+ return __floatundidf(a);
+}
+#else
+AEABI_RTABI double __aeabi_ul2d(du_int a) COMPILER_RT_ALIAS(__floatundidf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatundisf.c b/contrib/compiler-rt/lib/builtins/floatundisf.c
new file mode 100644
index 000000000000..844786ea7777
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatundisf.c
@@ -0,0 +1,85 @@
+/*===-- floatundisf.c - Implement __floatundisf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundisf for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type
+ * du_int is a 64 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI float
+__floatundisf(du_int a)
+{
+ if (a == 0)
+ return 0.0F;
+ const unsigned N = sizeof(du_int) * CHAR_BIT;
+ int sd = N - __builtin_clzll(a); /* number of significant digits */
+ int e = sd - 1; /* 8 exponent */
+ if (sd > FLT_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit FLT_MANT_DIG-1 bits to the right of 1
+ * Q = bit FLT_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case FLT_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case FLT_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (FLT_MANT_DIG+2))) |
+ ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+ if (a & ((du_int)1 << FLT_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (FLT_MANT_DIG - sd);
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ float_bits fb;
+ fb.u = ((e + 127) << 23) | /* exponent */
+ ((su_int)a & 0x007FFFFF); /* mantissa */
+ return fb.f;
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_ul2f(du_int a) {
+ return __floatundisf(a);
+}
+#else
+AEABI_RTABI float __aeabi_ul2f(du_int a) COMPILER_RT_ALIAS(__floatundisf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatunditf.c b/contrib/compiler-rt/lib/builtins/floatunditf.c
new file mode 100644
index 000000000000..8098e95e82bc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatunditf.c
@@ -0,0 +1,40 @@
+//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements du_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0) return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clzll(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatundixf.c b/contrib/compiler-rt/lib/builtins/floatundixf.c
new file mode 100644
index 000000000000..ca5e06d64dcd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatundixf.c
@@ -0,0 +1,42 @@
+/* ===-- floatundixf.c - Implement __floatundixf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatundixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ * du_int is a 64 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+COMPILER_RT_ABI long double
+__floatundixf(du_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(du_int) * CHAR_BIT;
+ int clz = __builtin_clzll(a);
+ int e = (N - 1) - clz ; /* exponent */
+ long_double_bits fb;
+ fb.u.high.s.low = (e + 16383); /* exponent */
+ fb.u.low.all = a << clz; /* mantissa */
+ return fb.f;
+}
+
+#endif /* _ARCH_PPC */
diff --git a/contrib/compiler-rt/lib/builtins/floatunsidf.c b/contrib/compiler-rt/lib/builtins/floatunsidf.c
new file mode 100644
index 000000000000..75cf6b9177df
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatunsidf.c
@@ -0,0 +1,50 @@
+//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t
+__floatunsidf(unsigned int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0) return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) {
+ return __floatunsidf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) COMPILER_RT_ALIAS(__floatunsidf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatunsisf.c b/contrib/compiler-rt/lib/builtins/floatunsisf.c
new file mode 100644
index 000000000000..29525ccedbbe
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatunsisf.c
@@ -0,0 +1,58 @@
+//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t
+__floatunsisf(unsigned int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0) return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(a);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ if (exponent <= significandBits) {
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+ } else {
+ const int shift = exponent - significandBits;
+ result = (rep_t)a >> shift ^ implicitBit;
+ rep_t round = (rep_t)a << (typeWidth - shift);
+ if (round > signBit) result++;
+ if (round == signBit) result += result & 1;
+ }
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) {
+ return __floatunsisf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) COMPILER_RT_ALIAS(__floatunsisf);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatunsitf.c b/contrib/compiler-rt/lib/builtins/floatunsitf.c
new file mode 100644
index 000000000000..1cd1842e709a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatunsitf.c
@@ -0,0 +1,40 @@
+//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0) return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clz(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatuntidf.c b/contrib/compiler-rt/lib/builtins/floatuntidf.c
new file mode 100644
index 000000000000..960265d80772
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatuntidf.c
@@ -0,0 +1,80 @@
+/* ===-- floatuntidf.c - Implement __floatuntidf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntidf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a double, rounding toward even. */
+
+/* Assumption: double is a IEEE 64 bit floating point type
+ * tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI double
+__floatuntidf(tu_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(tu_int) * CHAR_BIT;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > DBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit DBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit DBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case DBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case DBL_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (DBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << DBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (DBL_MANT_DIG - sd);
+ /* a is now rounded to DBL_MANT_DIG bits */
+ }
+ double_bits fb;
+ fb.u.s.high = ((e + 1023) << 20) | /* exponent */
+ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */
+ fb.u.s.low = (su_int)a; /* mantissa-low */
+ return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floatuntisf.c b/contrib/compiler-rt/lib/builtins/floatuntisf.c
new file mode 100644
index 000000000000..c0dd0275ddba
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatuntisf.c
@@ -0,0 +1,79 @@
+/* ===-- floatuntisf.c - Implement __floatuntisf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntisf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a float, rounding toward even. */
+
+/* Assumption: float is a IEEE 32 bit floating point type
+ * tu_int is a 128 bit integral type
+ */
+
+/* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */
+
+COMPILER_RT_ABI float
+__floatuntisf(tu_int a)
+{
+ if (a == 0)
+ return 0.0F;
+ const unsigned N = sizeof(tu_int) * CHAR_BIT;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > FLT_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit FLT_MANT_DIG-1 bits to the right of 1
+ * Q = bit FLT_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case FLT_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case FLT_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (FLT_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << FLT_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (FLT_MANT_DIG - sd);
+ /* a is now rounded to FLT_MANT_DIG bits */
+ }
+ float_bits fb;
+ fb.u = ((e + 127) << 23) | /* exponent */
+ ((su_int)a & 0x007FFFFF); /* mantissa */
+ return fb.f;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/floatuntitf.c b/contrib/compiler-rt/lib/builtins/floatuntitf.c
new file mode 100644
index 000000000000..e2518c93f234
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatuntitf.c
@@ -0,0 +1,79 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ * tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(tu_int) * CHAR_BIT;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG) {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd) {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ } else {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+
+ long_double_bits fb;
+ fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */
+ | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */
+ fb.u.low.all = (du_int)(a);
+ return fb.f;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/floatuntixf.c b/contrib/compiler-rt/lib/builtins/floatuntixf.c
new file mode 100644
index 000000000000..ea81cb1bcdae
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/floatuntixf.c
@@ -0,0 +1,81 @@
+/* ===-- floatuntixf.c - Implement __floatuntixf ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __floatuntixf for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: convert a to a long double, rounding toward even. */
+
+/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits
+ * tu_int is a 128 bit integral type
+ */
+
+/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
+ * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+COMPILER_RT_ABI long double
+__floatuntixf(tu_int a)
+{
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(tu_int) * CHAR_BIT;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG)
+ {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd)
+ {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG))
+ {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+ else
+ {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+ long_double_bits fb;
+ fb.u.high.s.low = (e + 16383); /* exponent */
+ fb.u.low.all = (du_int)a; /* mantissa */
+ return fb.f;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/fp_add_impl.inc b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc
new file mode 100644
index 000000000000..b47be1b648e6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_add_impl.inc
@@ -0,0 +1,144 @@
+//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float addition with the IEEE-754 default rounding
+// (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fp_t __addXf3__(fp_t a, fp_t b) {
+ rep_t aRep = toRep(a);
+ rep_t bRep = toRep(b);
+ const rep_t aAbs = aRep & absMask;
+ const rep_t bAbs = bRep & absMask;
+
+ // Detect if a or b is zero, infinity, or NaN.
+ if (aAbs - REP_C(1) >= infRep - REP_C(1) ||
+ bAbs - REP_C(1) >= infRep - REP_C(1)) {
+ // NaN + anything = qNaN
+ if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+ // anything + NaN = qNaN
+ if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // +/-infinity + -/+infinity = qNaN
+ if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
+ // +/-infinity + anything remaining = +/- infinity
+ else return a;
+ }
+
+ // anything remaining + +/-infinity = +/-infinity
+ if (bAbs == infRep) return b;
+
+ // zero + anything = anything
+ if (!aAbs) {
+ // but we need to get the sign right for zero + zero
+ if (!bAbs) return fromRep(toRep(a) & toRep(b));
+ else return b;
+ }
+
+ // anything + zero = anything
+ if (!bAbs) return a;
+ }
+
+ // Swap a and b if necessary so that a has the larger absolute value.
+ if (bAbs > aAbs) {
+ const rep_t temp = aRep;
+ aRep = bRep;
+ bRep = temp;
+ }
+
+ // Extract the exponent and significand from the (possibly swapped) a and b.
+ int aExponent = aRep >> significandBits & maxExponent;
+ int bExponent = bRep >> significandBits & maxExponent;
+ rep_t aSignificand = aRep & significandMask;
+ rep_t bSignificand = bRep & significandMask;
+
+ // Normalize any denormals, and adjust the exponent accordingly.
+ if (aExponent == 0) aExponent = normalize(&aSignificand);
+ if (bExponent == 0) bExponent = normalize(&bSignificand);
+
+ // The sign of the result is the sign of the larger operand, a. If they
+ // have opposite signs, we are performing a subtraction; otherwise addition.
+ const rep_t resultSign = aRep & signBit;
+ const bool subtraction = (aRep ^ bRep) & signBit;
+
+ // Shift the significands to give us round, guard and sticky, and or in the
+ // implicit significand bit. (If we fell through from the denormal path it
+ // was already set by normalize( ), but setting it twice won't hurt
+ // anything.)
+ aSignificand = (aSignificand | implicitBit) << 3;
+ bSignificand = (bSignificand | implicitBit) << 3;
+
+ // Shift the significand of b by the difference in exponents, with a sticky
+ // bottom bit to get rounding correct.
+ const unsigned int align = aExponent - bExponent;
+ if (align) {
+ if (align < typeWidth) {
+ const bool sticky = bSignificand << (typeWidth - align);
+ bSignificand = bSignificand >> align | sticky;
+ } else {
+ bSignificand = 1; // sticky; b is known to be non-zero.
+ }
+ }
+ if (subtraction) {
+ aSignificand -= bSignificand;
+ // If a == -b, return +zero.
+ if (aSignificand == 0) return fromRep(0);
+
+ // If partial cancellation occured, we need to left-shift the result
+ // and adjust the exponent:
+ if (aSignificand < implicitBit << 3) {
+ const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+ aSignificand <<= shift;
+ aExponent -= shift;
+ }
+ }
+ else /* addition */ {
+ aSignificand += bSignificand;
+
+ // If the addition carried up, we need to right-shift the result and
+ // adjust the exponent:
+ if (aSignificand & implicitBit << 4) {
+ const bool sticky = aSignificand & 1;
+ aSignificand = aSignificand >> 1 | sticky;
+ aExponent += 1;
+ }
+ }
+
+ // If we have overflowed the type, return +/- infinity:
+ if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
+
+ if (aExponent <= 0) {
+ // Result is denormal before rounding; the exponent is zero and we
+ // need to shift the significand.
+ const int shift = 1 - aExponent;
+ const bool sticky = aSignificand << (typeWidth - shift);
+ aSignificand = aSignificand >> shift | sticky;
+ aExponent = 0;
+ }
+
+ // Low three bits are round, guard, and sticky.
+ const int roundGuardSticky = aSignificand & 0x7;
+
+ // Shift the significand into place, and mask off the implicit bit.
+ rep_t result = aSignificand >> 3 & significandMask;
+
+ // Insert the exponent and sign.
+ result |= (rep_t)aExponent << significandBits;
+ result |= resultSign;
+
+ // Final rounding. The result may overflow to infinity, but that is the
+ // correct result in that case.
+ if (roundGuardSticky > 0x4) result++;
+ if (roundGuardSticky == 0x4) result += result & 1;
+ return fromRep(result);
+}
diff --git a/contrib/compiler-rt/lib/builtins/fp_extend.h b/contrib/compiler-rt/lib/builtins/fp_extend.h
new file mode 100644
index 000000000000..6d95a0680709
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_extend.h
@@ -0,0 +1,89 @@
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_EXTEND_HEADER
+#define FP_EXTEND_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+#define src_rep_t_clz __builtin_clz
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+static __inline int src_rep_t_clz(src_rep_t a) {
+#if defined __LP64__
+ return __builtin_clzl(a);
+#else
+ if (a & REP_C(0xffffffff00000000))
+ return __builtin_clz(a >> 32);
+ else
+ return 32 + __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#elif defined SRC_HALF
+typedef uint16_t src_t;
+typedef uint16_t src_rep_t;
+#define SRC_REP_C UINT16_C
+static const int srcSigBits = 10;
+#define src_rep_t_clz __builtin_clz
+
+#else
+#error Source should be half, single, or double precision!
+#endif //end source precision
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstSigBits = 23;
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_QUAD
+typedef long double dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstSigBits = 112;
+
+#else
+#error Destination should be single, double, or quad precision!
+#endif //end destination precision
+
+// End of specialization parameters. Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static __inline src_rep_t srcToRep(src_t x) {
+ const union { src_t f; src_rep_t i; } rep = {.f = x};
+ return rep.i;
+}
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+ const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+ return rep.f;
+}
+// End helper routines. Conversion implementation follows.
+
+#endif //FP_EXTEND_HEADER
diff --git a/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc
new file mode 100644
index 000000000000..b785cc7687ad
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_extend_impl.inc
@@ -0,0 +1,108 @@
+//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type. The constants and types defined following the
+// includes below parameterize the conversion.
+//
+// It does not support types that don't use the usual IEEE-754 interchange
+// formats; specifically, some work would be needed to adapt it to
+// (for example) the Intel 80-bit format or PowerPC double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example. You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+// target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+// significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_extend.h"
+
+static __inline dst_t __extendXfYf2__(src_t a) {
+ // Various constants whose values follow from the type parameters.
+ // Any reasonable optimizer will fold and propagate all of these.
+ const int srcBits = sizeof(src_t)*CHAR_BIT;
+ const int srcExpBits = srcBits - srcSigBits - 1;
+ const int srcInfExp = (1 << srcExpBits) - 1;
+ const int srcExpBias = srcInfExp >> 1;
+
+ const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+ const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+ const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+ const src_rep_t srcAbsMask = srcSignMask - 1;
+ const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+ const src_rep_t srcNaNCode = srcQNaN - 1;
+
+ const int dstBits = sizeof(dst_t)*CHAR_BIT;
+ const int dstExpBits = dstBits - dstSigBits - 1;
+ const int dstInfExp = (1 << dstExpBits) - 1;
+ const int dstExpBias = dstInfExp >> 1;
+
+ const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
+
+ // Break a into a sign and representation of the absolute value
+ const src_rep_t aRep = srcToRep(a);
+ const src_rep_t aAbs = aRep & srcAbsMask;
+ const src_rep_t sign = aRep & srcSignMask;
+ dst_rep_t absResult;
+
+ // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted
+ // to (signed) int. To avoid that, explicitly cast to src_rep_t.
+ if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
+ // a is a normal number.
+ // Extend to the destination type by shifting the significand and
+ // exponent into the proper position and rebiasing the exponent.
+ absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
+ absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
+ }
+
+ else if (aAbs >= srcInfinity) {
+ // a is NaN or infinity.
+ // Conjure the result by beginning with infinity, then setting the qNaN
+ // bit (if needed) and right-aligning the rest of the trailing NaN
+ // payload field.
+ absResult = (dst_rep_t)dstInfExp << dstSigBits;
+ absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
+ absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
+ }
+
+ else if (aAbs) {
+ // a is denormal.
+ // renormalize the significand and clear the leading bit, then insert
+ // the correct adjusted exponent in the destination type.
+ const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
+ absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
+ absResult ^= dstMinNormal;
+ const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+ absResult |= (dst_rep_t)resultExponent << dstSigBits;
+ }
+
+ else {
+ // a is zero.
+ absResult = 0;
+ }
+
+ // Apply the signbit to (dst_t)abs(a).
+ const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
+ return dstFromRep(result);
+}
diff --git a/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc
new file mode 100644
index 000000000000..da70d4d39301
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_fixint_impl.inc
@@ -0,0 +1,41 @@
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixint_t __fixint(fp_t a) {
+ const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
+ const fixint_t fixint_min = -fixint_max - 1;
+ // Break a into sign, exponent, significand
+ const rep_t aRep = toRep(a);
+ const rep_t aAbs = aRep & absMask;
+ const fixint_t sign = aRep & signBit ? -1 : 1;
+ const int exponent = (aAbs >> significandBits) - exponentBias;
+ const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+ // If exponent is negative, the result is zero.
+ if (exponent < 0)
+ return 0;
+
+ // If the value is too large for the integer type, saturate.
+ if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT)
+ return sign == 1 ? fixint_max : fixint_min;
+
+ // If 0 <= exponent < significandBits, right shift to get the result.
+ // Otherwise, shift left.
+ if (exponent < significandBits)
+ return sign * (significand >> (significandBits - exponent));
+ else
+ return sign * ((fixint_t)significand << (exponent - significandBits));
+}
diff --git a/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc
new file mode 100644
index 000000000000..d68ccf27a79c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_fixuint_impl.inc
@@ -0,0 +1,39 @@
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to unsigned integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixuint_t __fixuint(fp_t a) {
+ // Break a into sign, exponent, significand
+ const rep_t aRep = toRep(a);
+ const rep_t aAbs = aRep & absMask;
+ const int sign = aRep & signBit ? -1 : 1;
+ const int exponent = (aAbs >> significandBits) - exponentBias;
+ const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+ // If either the value or the exponent is negative, the result is zero.
+ if (sign == -1 || exponent < 0)
+ return 0;
+
+ // If the value is too large for the integer type, saturate.
+ if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT)
+ return ~(fixuint_t)0;
+
+ // If 0 <= exponent < significandBits, right shift to get the result.
+ // Otherwise, shift left.
+ if (exponent < significandBits)
+ return significand >> (significandBits - exponent);
+ else
+ return (fixuint_t)significand << (exponent - significandBits);
+}
diff --git a/contrib/compiler-rt/lib/builtins/fp_lib.h b/contrib/compiler-rt/lib/builtins/fp_lib.h
new file mode 100644
index 000000000000..a0e19ab6a8f6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_lib.h
@@ -0,0 +1,327 @@
+//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a configuration header for soft-float routines in compiler-rt.
+// This file does not provide any part of the compiler-rt interface, but defines
+// many useful constants and utility routines that are used in the
+// implementation of the soft-float routines in compiler-rt.
+//
+// Assumes that float, double and long double correspond to the IEEE-754
+// binary32, binary64 and binary 128 types, respectively, and that integer
+// endianness matches floating point endianness on the target platform.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_LIB_HEADER
+#define FP_LIB_HEADER
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include "int_lib.h"
+#include "int_math.h"
+
+// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in
+// 32-bit mode.
+#if defined(__FreeBSD__) && defined(__i386__)
+# include <sys/param.h>
+# if __FreeBSD_version < 903000 // v9.3
+# define uint64_t unsigned long long
+# define int64_t long long
+# undef UINT64_C
+# define UINT64_C(c) (c ## ULL)
+# endif
+#endif
+
+#if defined SINGLE_PRECISION
+
+typedef uint32_t rep_t;
+typedef int32_t srep_t;
+typedef float fp_t;
+#define REP_C UINT32_C
+#define significandBits 23
+
+static __inline int rep_clz(rep_t a) {
+ return __builtin_clz(a);
+}
+
+// 32x32 --> 64 bit multiply
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+ const uint64_t product = (uint64_t)a*b;
+ *hi = product >> 32;
+ *lo = product;
+}
+COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
+
+#elif defined DOUBLE_PRECISION
+
+typedef uint64_t rep_t;
+typedef int64_t srep_t;
+typedef double fp_t;
+#define REP_C UINT64_C
+#define significandBits 52
+
+static __inline int rep_clz(rep_t a) {
+#if defined __LP64__
+ return __builtin_clzl(a);
+#else
+ if (a & REP_C(0xffffffff00000000))
+ return __builtin_clz(a >> 32);
+ else
+ return 32 + __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#define loWord(a) (a & 0xffffffffU)
+#define hiWord(a) (a >> 32)
+
+// 64x64 -> 128 wide multiply for platforms that don't have such an operation;
+// many 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+ // Each of the component 32x32 -> 64 products
+ const uint64_t plolo = loWord(a) * loWord(b);
+ const uint64_t plohi = loWord(a) * hiWord(b);
+ const uint64_t philo = hiWord(a) * loWord(b);
+ const uint64_t phihi = hiWord(a) * hiWord(b);
+ // Sum terms that contribute to lo in a way that allows us to get the carry
+ const uint64_t r0 = loWord(plolo);
+ const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);
+ *lo = r0 + (r1 << 32);
+ // Sum terms contributing to hi with the carry from lo
+ *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;
+}
+#undef loWord
+#undef hiWord
+
+COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
+
+#elif defined QUAD_PRECISION
+#if __LDBL_MANT_DIG__ == 113
+#define CRT_LDBL_128BIT
+typedef __uint128_t rep_t;
+typedef __int128_t srep_t;
+typedef long double fp_t;
+#define REP_C (__uint128_t)
+// Note: Since there is no explicit way to tell compiler the constant is a
+// 128-bit integer, we let the constant be casted to 128-bit integer
+#define significandBits 112
+
+static __inline int rep_clz(rep_t a) {
+ const union
+ {
+ __uint128_t ll;
+#if _YUGA_BIG_ENDIAN
+ struct { uint64_t high, low; } s;
+#else
+ struct { uint64_t low, high; } s;
+#endif
+ } uu = { .ll = a };
+
+ uint64_t word;
+ uint64_t add;
+
+ if (uu.s.high){
+ word = uu.s.high;
+ add = 0;
+ }
+ else{
+ word = uu.s.low;
+ add = 64;
+ }
+ return __builtin_clzll(word) + add;
+}
+
+#define Word_LoMask UINT64_C(0x00000000ffffffff)
+#define Word_HiMask UINT64_C(0xffffffff00000000)
+#define Word_FullMask UINT64_C(0xffffffffffffffff)
+#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)
+#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)
+#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)
+#define Word_4(a) (uint64_t)(a & Word_LoMask)
+
+// 128x128 -> 256 wide multiply for platforms that don't have such an operation;
+// many 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+
+ const uint64_t product11 = Word_1(a) * Word_1(b);
+ const uint64_t product12 = Word_1(a) * Word_2(b);
+ const uint64_t product13 = Word_1(a) * Word_3(b);
+ const uint64_t product14 = Word_1(a) * Word_4(b);
+ const uint64_t product21 = Word_2(a) * Word_1(b);
+ const uint64_t product22 = Word_2(a) * Word_2(b);
+ const uint64_t product23 = Word_2(a) * Word_3(b);
+ const uint64_t product24 = Word_2(a) * Word_4(b);
+ const uint64_t product31 = Word_3(a) * Word_1(b);
+ const uint64_t product32 = Word_3(a) * Word_2(b);
+ const uint64_t product33 = Word_3(a) * Word_3(b);
+ const uint64_t product34 = Word_3(a) * Word_4(b);
+ const uint64_t product41 = Word_4(a) * Word_1(b);
+ const uint64_t product42 = Word_4(a) * Word_2(b);
+ const uint64_t product43 = Word_4(a) * Word_3(b);
+ const uint64_t product44 = Word_4(a) * Word_4(b);
+
+ const __uint128_t sum0 = (__uint128_t)product44;
+ const __uint128_t sum1 = (__uint128_t)product34 +
+ (__uint128_t)product43;
+ const __uint128_t sum2 = (__uint128_t)product24 +
+ (__uint128_t)product33 +
+ (__uint128_t)product42;
+ const __uint128_t sum3 = (__uint128_t)product14 +
+ (__uint128_t)product23 +
+ (__uint128_t)product32 +
+ (__uint128_t)product41;
+ const __uint128_t sum4 = (__uint128_t)product13 +
+ (__uint128_t)product22 +
+ (__uint128_t)product31;
+ const __uint128_t sum5 = (__uint128_t)product12 +
+ (__uint128_t)product21;
+ const __uint128_t sum6 = (__uint128_t)product11;
+
+ const __uint128_t r0 = (sum0 & Word_FullMask) +
+ ((sum1 & Word_LoMask) << 32);
+ const __uint128_t r1 = (sum0 >> 64) +
+ ((sum1 >> 32) & Word_FullMask) +
+ (sum2 & Word_FullMask) +
+ ((sum3 << 32) & Word_HiMask);
+
+ *lo = r0 + (r1 << 64);
+ *hi = (r1 >> 64) +
+ (sum1 >> 96) +
+ (sum2 >> 64) +
+ (sum3 >> 32) +
+ sum4 +
+ (sum5 << 32) +
+ (sum6 << 64);
+}
+#undef Word_1
+#undef Word_2
+#undef Word_3
+#undef Word_4
+#undef Word_HiMask
+#undef Word_LoMask
+#undef Word_FullMask
+#endif // __LDBL_MANT_DIG__ == 113
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+#endif
+
+#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT)
+#define typeWidth (sizeof(rep_t)*CHAR_BIT)
+#define exponentBits (typeWidth - significandBits - 1)
+#define maxExponent ((1 << exponentBits) - 1)
+#define exponentBias (maxExponent >> 1)
+
+#define implicitBit (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit (REP_C(1) << (significandBits + exponentBits))
+#define absMask (signBit - 1U)
+#define exponentMask (absMask ^ significandMask)
+#define oneRep ((rep_t)exponentBias << significandBits)
+#define infRep exponentMask
+#define quietBit (implicitBit >> 1)
+#define qnanRep (exponentMask | quietBit)
+
+static __inline rep_t toRep(fp_t x) {
+ const union { fp_t f; rep_t i; } rep = {.f = x};
+ return rep.i;
+}
+
+static __inline fp_t fromRep(rep_t x) {
+ const union { fp_t f; rep_t i; } rep = {.i = x};
+ return rep.f;
+}
+
+static __inline int normalize(rep_t *significand) {
+ const int shift = rep_clz(*significand) - rep_clz(implicitBit);
+ *significand <<= shift;
+ return 1 - shift;
+}
+
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+ *hi = *hi << count | *lo >> (typeWidth - count);
+ *lo = *lo << count;
+}
+
+static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {
+ if (count < typeWidth) {
+ const bool sticky = *lo << (typeWidth - count);
+ *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
+ *hi = *hi >> count;
+ }
+ else if (count < 2*typeWidth) {
+ const bool sticky = *hi << (2*typeWidth - count) | *lo;
+ *lo = *hi >> (count - typeWidth) | sticky;
+ *hi = 0;
+ } else {
+ const bool sticky = *hi | *lo;
+ *lo = sticky;
+ *hi = 0;
+ }
+}
+
+// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids
+// pulling in a libm dependency from compiler-rt, but is not meant to replace
+// it (i.e. code calling logb() should get the one from libm, not this), hence
+// the __compiler_rt prefix.
+static __inline fp_t __compiler_rt_logbX(fp_t x) {
+ rep_t rep = toRep(x);
+ int exp = (rep & exponentMask) >> significandBits;
+
+ // Abnormal cases:
+ // 1) +/- inf returns +inf; NaN returns NaN
+ // 2) 0.0 returns -inf
+ if (exp == maxExponent) {
+ if (((rep & signBit) == 0) || (x != x)) {
+ return x; // NaN or +inf: return x
+ } else {
+ return -x; // -inf: return -x
+ }
+ } else if (x == 0.0) {
+ // 0.0: return -inf
+ return fromRep(infRep | signBit);
+ }
+
+ if (exp != 0) {
+ // Normal number
+ return exp - exponentBias; // Unbias exponent
+ } else {
+ // Subnormal number; normalize and repeat
+ rep &= absMask;
+ const int shift = 1 - normalize(&rep);
+ exp = (rep & exponentMask) >> significandBits;
+ return exp - exponentBias - shift; // Unbias exponent
+ }
+}
+#endif
+
+#if defined(SINGLE_PRECISION)
+static __inline fp_t __compiler_rt_logbf(fp_t x) {
+ return __compiler_rt_logbX(x);
+}
+#elif defined(DOUBLE_PRECISION)
+static __inline fp_t __compiler_rt_logb(fp_t x) {
+ return __compiler_rt_logbX(x);
+}
+#elif defined(QUAD_PRECISION)
+ #if defined(CRT_LDBL_128BIT)
+static __inline fp_t __compiler_rt_logbl(fp_t x) {
+ return __compiler_rt_logbX(x);
+}
+ #else
+// The generic implementation only works for ieee754 floating point. For other
+// floating point types, continue to rely on the libm implementation for now.
+static __inline long double __compiler_rt_logbl(long double x) {
+ return crt_logbl(x);
+}
+ #endif
+#endif
+
+#endif // FP_LIB_HEADER
diff --git a/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc
new file mode 100644
index 000000000000..b34aa1b8f544
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_mul_impl.inc
@@ -0,0 +1,116 @@
+//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float multiplication with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN * anything = qNaN
+ if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+ // anything * NaN = qNaN
+ if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity * non-zero = +/- infinity
+ if (bAbs) return fromRep(aAbs | productSign);
+ // infinity * zero = NaN
+ else return fromRep(qnanRep);
+ }
+
+ if (bAbs == infRep) {
+ //? non-zero * infinity = +/- infinity
+ if (aAbs) return fromRep(bAbs | productSign);
+ // zero * infinity = NaN
+ else return fromRep(qnanRep);
+ }
+
+ // zero * anything = +/- zero
+ if (!aAbs) return fromRep(productSign);
+ // anything * zero = +/- zero
+ if (!bAbs) return fromRep(productSign);
+
+ // one or both of a or b is denormal, the other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit) scale += normalize(&aSignificand);
+ if (bAbs < implicitBit) scale += normalize(&bSignificand);
+ }
+
+ // Or in the implicit significand bit. (If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.)
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+
+ // Get the significand of a*b. Before multiplying the significands, shift
+ // one of them left to left-align it in the field. Thus, the product will
+ // have (exponentBits + 2) integral digits, all but two of which must be
+ // zero. Normalizing this result is just a conditional left-shift by one
+ // and bumping the exponent accordingly.
+ rep_t productHi, productLo;
+ wideMultiply(aSignificand, bSignificand << exponentBits,
+ &productHi, &productLo);
+
+ int productExponent = aExponent + bExponent - exponentBias + scale;
+
+ // Normalize the significand, adjust exponent if needed.
+ if (productHi & implicitBit) productExponent++;
+ else wideLeftShift(&productHi, &productLo, 1);
+
+ // If we have overflowed the type, return +/- infinity.
+ if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+
+ if (productExponent <= 0) {
+ // Result is denormal before rounding
+ //
+ // If the result is so small that it just underflows to zero, return
+ // a zero of the appropriate sign. Mathematically there is no need to
+ // handle this case separately, but we make it a special case to
+ // simplify the shift logic.
+ const unsigned int shift = REP_C(1) - (unsigned int)productExponent;
+ if (shift >= typeWidth) return fromRep(productSign);
+
+ // Otherwise, shift the significand of the result so that the round
+ // bit is the high bit of productLo.
+ wideRightShiftWithSticky(&productHi, &productLo, shift);
+ }
+ else {
+ // Result is normal before rounding; insert the exponent.
+ productHi &= significandMask;
+ productHi |= (rep_t)productExponent << significandBits;
+ }
+
+ // Insert the sign of the result:
+ productHi |= productSign;
+
+ // Final rounding. The final result may overflow to infinity, or underflow
+ // to zero, but those are the correct results in those cases. We use the
+ // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+ if (productLo > signBit) productHi++;
+ if (productLo == signBit) productHi += productHi & 1;
+ return fromRep(productHi);
+}
diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc.h b/contrib/compiler-rt/lib/builtins/fp_trunc.h
new file mode 100644
index 000000000000..d5e79bb5b863
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_trunc.h
@@ -0,0 +1,76 @@
+//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination precision setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_TRUNC_HEADER
+#define FP_TRUNC_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+
+#elif defined SRC_QUAD
+typedef long double src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+static const int srcSigBits = 112;
+
+#else
+#error Source should be double precision or quad precision!
+#endif //end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstSigBits = 23;
+
+#elif defined DST_HALF
+typedef uint16_t dst_t;
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstSigBits = 10;
+
+#else
+#error Destination should be single precision or double precision!
+#endif //end destination precision
+
+// End of specialization parameters. Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static __inline src_rep_t srcToRep(src_t x) {
+ const union { src_t f; src_rep_t i; } rep = {.f = x};
+ return rep.i;
+}
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+ const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+ return rep.f;
+}
+
+#endif // FP_TRUNC_HEADER
diff --git a/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc
new file mode 100644
index 000000000000..d88ae060913f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/fp_trunc_impl.inc
@@ -0,0 +1,135 @@
+//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a wider to a narrower
+// IEEE-754 floating-point type in the default (round to nearest, ties to even)
+// rounding mode. The constants and types defined following the includes below
+// parameterize the conversion.
+//
+// This routine can be trivially adapted to support conversions to
+// half-precision or from quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *narrowing* operations; if you need to convert to a *wider* floating-point
+// type (e.g. float -> double), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+// target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+// significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_trunc.h"
+
+static __inline dst_t __truncXfYf2__(src_t a) {
+ // Various constants whose values follow from the type parameters.
+ // Any reasonable optimizer will fold and propagate all of these.
+ const int srcBits = sizeof(src_t)*CHAR_BIT;
+ const int srcExpBits = srcBits - srcSigBits - 1;
+ const int srcInfExp = (1 << srcExpBits) - 1;
+ const int srcExpBias = srcInfExp >> 1;
+
+ const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+ const src_rep_t srcSignificandMask = srcMinNormal - 1;
+ const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+ const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+ const src_rep_t srcAbsMask = srcSignMask - 1;
+ const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1;
+ const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1);
+ const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+ const src_rep_t srcNaNCode = srcQNaN - 1;
+
+ const int dstBits = sizeof(dst_t)*CHAR_BIT;
+ const int dstExpBits = dstBits - dstSigBits - 1;
+ const int dstInfExp = (1 << dstExpBits) - 1;
+ const int dstExpBias = dstInfExp >> 1;
+
+ const int underflowExponent = srcExpBias + 1 - dstExpBias;
+ const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+ const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits;
+ const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits;
+
+ const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1);
+ const dst_rep_t dstNaNCode = dstQNaN - 1;
+
+ // Break a into a sign and representation of the absolute value
+ const src_rep_t aRep = srcToRep(a);
+ const src_rep_t aAbs = aRep & srcAbsMask;
+ const src_rep_t sign = aRep & srcSignMask;
+ dst_rep_t absResult;
+
+ if (aAbs - underflow < aAbs - overflow) {
+ // The exponent of a is within the range of normal numbers in the
+ // destination format. We can convert by simply right-shifting with
+ // rounding and adjusting the exponent.
+ absResult = aAbs >> (srcSigBits - dstSigBits);
+ absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
+
+ const src_rep_t roundBits = aAbs & roundMask;
+ // Round to nearest
+ if (roundBits > halfway)
+ absResult++;
+ // Ties to even
+ else if (roundBits == halfway)
+ absResult += absResult & 1;
+ }
+ else if (aAbs > srcInfinity) {
+ // a is NaN.
+ // Conjure the result by beginning with infinity, setting the qNaN
+ // bit and inserting the (truncated) trailing NaN field.
+ absResult = (dst_rep_t)dstInfExp << dstSigBits;
+ absResult |= dstQNaN;
+ absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode;
+ }
+ else if (aAbs >= overflow) {
+ // a overflows to infinity.
+ absResult = (dst_rep_t)dstInfExp << dstSigBits;
+ }
+ else {
+ // a underflows on conversion to the destination type or is an exact
+ // zero. The result may be a denormal or zero. Extract the exponent
+ // to get the shift amount for the denormalization.
+ const int aExp = aAbs >> srcSigBits;
+ const int shift = srcExpBias - dstExpBias - aExp + 1;
+
+ const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal;
+
+ // Right shift by the denormalization amount with sticky.
+ if (shift > srcSigBits) {
+ absResult = 0;
+ } else {
+ const bool sticky = significand << (srcBits - shift);
+ src_rep_t denormalizedSignificand = significand >> shift | sticky;
+ absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
+ const src_rep_t roundBits = denormalizedSignificand & roundMask;
+ // Round to nearest
+ if (roundBits > halfway)
+ absResult++;
+ // Ties to even
+ else if (roundBits == halfway)
+ absResult += absResult & 1;
+ }
+ }
+
+ // Apply the signbit to (dst_t)abs(a).
+ const dst_rep_t result = absResult | sign >> (srcBits - dstBits);
+ return dstFromRep(result);
+}
diff --git a/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c
new file mode 100644
index 000000000000..68581ef16434
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/gcc_personality_v0.c
@@ -0,0 +1,251 @@
+/* ===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ */
+
+#include "int_lib.h"
+
+#include <unwind.h>
+#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+/*
+ * When building with older compilers (e.g. clang <3.9), it is possible that we
+ * have a version of unwind.h which does not provide the EHABI declarations
+ * which are quired for the C personality to conform to the specification. In
+ * order to provide forward compatibility for such compilers, we re-declare the
+ * necessary interfaces in the helper to permit a standalone compilation of the
+ * builtins (which contains the C unwinding personality for historical reasons).
+ */
+#include "unwind-ehabi-helpers.h"
+#endif
+
+/*
+ * Pointer encodings documented at:
+ * http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
+ */
+
+#define DW_EH_PE_omit 0xff /* no data follows */
+
+#define DW_EH_PE_absptr 0x00
+#define DW_EH_PE_uleb128 0x01
+#define DW_EH_PE_udata2 0x02
+#define DW_EH_PE_udata4 0x03
+#define DW_EH_PE_udata8 0x04
+#define DW_EH_PE_sleb128 0x09
+#define DW_EH_PE_sdata2 0x0A
+#define DW_EH_PE_sdata4 0x0B
+#define DW_EH_PE_sdata8 0x0C
+
+#define DW_EH_PE_pcrel 0x10
+#define DW_EH_PE_textrel 0x20
+#define DW_EH_PE_datarel 0x30
+#define DW_EH_PE_funcrel 0x40
+#define DW_EH_PE_aligned 0x50
+#define DW_EH_PE_indirect 0x80 /* gcc extension */
+
+
+
+/* read a uleb128 encoded value and advance pointer */
+static uintptr_t readULEB128(const uint8_t** data)
+{
+ uintptr_t result = 0;
+ uintptr_t shift = 0;
+ unsigned char byte;
+ const uint8_t* p = *data;
+ do {
+ byte = *p++;
+ result |= (byte & 0x7f) << shift;
+ shift += 7;
+ } while (byte & 0x80);
+ *data = p;
+ return result;
+}
+
+/* read a pointer encoded value and advance pointer */
+static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding)
+{
+ const uint8_t* p = *data;
+ uintptr_t result = 0;
+
+ if ( encoding == DW_EH_PE_omit )
+ return 0;
+
+ /* first get value */
+ switch (encoding & 0x0F) {
+ case DW_EH_PE_absptr:
+ result = *((const uintptr_t*)p);
+ p += sizeof(uintptr_t);
+ break;
+ case DW_EH_PE_uleb128:
+ result = readULEB128(&p);
+ break;
+ case DW_EH_PE_udata2:
+ result = *((const uint16_t*)p);
+ p += sizeof(uint16_t);
+ break;
+ case DW_EH_PE_udata4:
+ result = *((const uint32_t*)p);
+ p += sizeof(uint32_t);
+ break;
+ case DW_EH_PE_udata8:
+ result = *((const uint64_t*)p);
+ p += sizeof(uint64_t);
+ break;
+ case DW_EH_PE_sdata2:
+ result = *((const int16_t*)p);
+ p += sizeof(int16_t);
+ break;
+ case DW_EH_PE_sdata4:
+ result = *((const int32_t*)p);
+ p += sizeof(int32_t);
+ break;
+ case DW_EH_PE_sdata8:
+ result = *((const int64_t*)p);
+ p += sizeof(int64_t);
+ break;
+ case DW_EH_PE_sleb128:
+ default:
+ /* not supported */
+ compilerrt_abort();
+ break;
+ }
+
+ /* then add relative offset */
+ switch ( encoding & 0x70 ) {
+ case DW_EH_PE_absptr:
+ /* do nothing */
+ break;
+ case DW_EH_PE_pcrel:
+ result += (uintptr_t)(*data);
+ break;
+ case DW_EH_PE_textrel:
+ case DW_EH_PE_datarel:
+ case DW_EH_PE_funcrel:
+ case DW_EH_PE_aligned:
+ default:
+ /* not supported */
+ compilerrt_abort();
+ break;
+ }
+
+ /* then apply indirection */
+ if (encoding & DW_EH_PE_indirect) {
+ result = *((const uintptr_t*)result);
+ }
+
+ *data = p;
+ return result;
+}
+
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
+ !defined(__ARM_DWARF_EH__)
+#define USING_ARM_EHABI 1
+_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *,
+ struct _Unwind_Context *);
+#endif
+
+static inline _Unwind_Reason_Code
+continueUnwind(struct _Unwind_Exception *exceptionObject,
+ struct _Unwind_Context *context) {
+#if USING_ARM_EHABI
+ /*
+ * On ARM EHABI the personality routine is responsible for actually
+ * unwinding a single stack frame before returning (ARM EHABI Sec. 6.1).
+ */
+ if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK)
+ return _URC_FAILURE;
+#endif
+ return _URC_CONTINUE_UNWIND;
+}
+
+/*
+ * The C compiler makes references to __gcc_personality_v0 in
+ * the dwarf unwind information for translation units that use
+ * __attribute__((cleanup(xx))) on local variables.
+ * This personality routine is called by the system unwinder
+ * on each frame as the stack is unwound during a C++ exception
+ * throw through a C function compiled with -fexceptions.
+ */
+#if __USING_SJLJ_EXCEPTIONS__
+/* the setjump-longjump based exceptions personality routine has a
+ * different name */
+COMPILER_RT_ABI _Unwind_Reason_Code
+__gcc_personality_sj0(int version, _Unwind_Action actions,
+ uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+ struct _Unwind_Context *context)
+#elif USING_ARM_EHABI
+/* The ARM EHABI personality routine has a different signature. */
+COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
+ _Unwind_State state, struct _Unwind_Exception *exceptionObject,
+ struct _Unwind_Context *context)
+#else
+COMPILER_RT_ABI _Unwind_Reason_Code
+__gcc_personality_v0(int version, _Unwind_Action actions,
+ uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+ struct _Unwind_Context *context)
+#endif
+{
+ /* Since C does not have catch clauses, there is nothing to do during */
+ /* phase 1 (the search phase). */
+#if USING_ARM_EHABI
+ /* After resuming from a cleanup we should also continue on to the next
+ * frame straight away. */
+ if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING)
+#else
+ if ( actions & _UA_SEARCH_PHASE )
+#endif
+ return continueUnwind(exceptionObject, context);
+
+ /* There is nothing to do if there is no LSDA for this frame. */
+ const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context);
+ if ( lsda == (uint8_t*) 0 )
+ return continueUnwind(exceptionObject, context);
+
+ uintptr_t pc = (uintptr_t)_Unwind_GetIP(context)-1;
+ uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context);
+ uintptr_t pcOffset = pc - funcStart;
+
+ /* Parse LSDA header. */
+ uint8_t lpStartEncoding = *lsda++;
+ if (lpStartEncoding != DW_EH_PE_omit) {
+ readEncodedPointer(&lsda, lpStartEncoding);
+ }
+ uint8_t ttypeEncoding = *lsda++;
+ if (ttypeEncoding != DW_EH_PE_omit) {
+ readULEB128(&lsda);
+ }
+ /* Walk call-site table looking for range that includes current PC. */
+ uint8_t callSiteEncoding = *lsda++;
+ uint32_t callSiteTableLength = readULEB128(&lsda);
+ const uint8_t* callSiteTableStart = lsda;
+ const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength;
+ const uint8_t* p=callSiteTableStart;
+ while (p < callSiteTableEnd) {
+ uintptr_t start = readEncodedPointer(&p, callSiteEncoding);
+ uintptr_t length = readEncodedPointer(&p, callSiteEncoding);
+ uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding);
+ readULEB128(&p); /* action value not used for C code */
+ if ( landingPad == 0 )
+ continue; /* no landing pad for this entry */
+ if ( (start <= pcOffset) && (pcOffset < (start+length)) ) {
+ /* Found landing pad for the PC.
+ * Set Instruction Pointer to so we re-enter function
+ * at landing pad. The landing pad is created by the compiler
+ * to take two parameters in registers.
+ */
+ _Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
+ (uintptr_t)exceptionObject);
+ _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
+ _Unwind_SetIP(context, (funcStart + landingPad));
+ return _URC_INSTALL_CONTEXT;
+ }
+ }
+
+ /* No landing pad found, continue unwinding. */
+ return continueUnwind(exceptionObject, context);
+}
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S
new file mode 100644
index 000000000000..d5479d2a52fa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S
@@ -0,0 +1,103 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Functions that implement common sequences in function prologues and epilogues
+ used to save code size */
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .globl \name
+ .type \name, @function
+ .falign
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+ .macro FALLTHROUGH_TAIL_CALL name0 name1
+ .size \name0, . - \name0
+ .globl \name1
+ .type \name1, @function
+ .falign
+\name1:
+ .endm
+
+
+
+
+/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */
+
+
+
+
+/* The compiler knows that the __save_* functions clobber LR. No other
+ registers should be used without informing the compiler. */
+
+/* Since we can only issue one store per packet, we don't hurt performance by
+ simply jumping to the right point in this sequence of stores. */
+
+FUNCTION_BEGIN __save_r24_through_r27
+ memd(fp+#-16) = r27:26
+FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
+ {
+ memd(fp+#-8) = r25:24
+ jumpr lr
+ }
+FUNCTION_END __save_r24_through_r25
+
+
+
+
+/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
+ with deallocframe. That way, the return gets the old value of lr, which is
+ where these functions need to return, and at the same time, lr gets the value
+ it needs going into the tail call. */
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
+ r27:26 = memd(fp+#-16)
+FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
+ {
+ r25:24 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
+
+
+
+
+/* Here we use the extra load bandwidth to restore LR early, allowing the return
+ to occur in parallel with the deallocframe. */
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
+ {
+ lr = memw(fp+#4)
+ r27:26 = memd(fp+#-16)
+ }
+ {
+ r25:24 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r24_through_r27_and_deallocframe
+
+
+
+
+/* Here the load bandwidth is maximized. */
+
+FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
+ {
+ r25:24 = memd(fp+#-8)
+ deallocframe
+ }
+ jumpr lr
+FUNCTION_END __restore_r24_through_r25_and_deallocframe
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S
new file mode 100644
index 000000000000..6f470343db49
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S
@@ -0,0 +1,268 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Functions that implement common sequences in function prologues and epilogues
+ used to save code size */
+
+ .macro FUNCTION_BEGIN name
+ .p2align 2
+ .section .text.\name,"ax",@progbits
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+ .macro FALLTHROUGH_TAIL_CALL name0 name1
+ .p2align 2
+ .size \name0, . - \name0
+ .globl \name1
+ .type \name1, @function
+\name1:
+ .endm
+
+
+
+
+/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at
+ fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48.
+ The compiler knows that the __save_* functions clobber LR. No other
+ registers should be used without informing the compiler. */
+
+FUNCTION_BEGIN __save_r16_through_r27
+ {
+ memd(fp+#-48) = r27:26
+ memd(fp+#-40) = r25:24
+ }
+ {
+ memd(fp+#-32) = r23:22
+ memd(fp+#-24) = r21:20
+ }
+ {
+ memd(fp+#-16) = r19:18
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r27
+
+FUNCTION_BEGIN __save_r16_through_r25
+ {
+ memd(fp+#-40) = r25:24
+ memd(fp+#-32) = r23:22
+ }
+ {
+ memd(fp+#-24) = r21:20
+ memd(fp+#-16) = r19:18
+ }
+ {
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r25
+
+FUNCTION_BEGIN __save_r16_through_r23
+ {
+ memd(fp+#-32) = r23:22
+ memd(fp+#-24) = r21:20
+ }
+ {
+ memd(fp+#-16) = r19:18
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r23
+
+FUNCTION_BEGIN __save_r16_through_r21
+ {
+ memd(fp+#-24) = r21:20
+ memd(fp+#-16) = r19:18
+ }
+ {
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r21
+
+FUNCTION_BEGIN __save_r16_through_r19
+ {
+ memd(fp+#-16) = r19:18
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r19
+
+FUNCTION_BEGIN __save_r16_through_r17
+ {
+ memd(fp+#-8) = r17:16
+ jumpr lr
+ }
+FUNCTION_END __save_r16_through_r17
+
+/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
+ with deallocframe. That way, the return gets the old value of lr, which is
+ where these functions need to return, and at the same time, lr gets the value
+ it needs going into the tail call. */
+
+
+FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall
+ r27:26 = memd(fp+#-48)
+ {
+ r25:24 = memd(fp+#-40)
+ r23:22 = memd(fp+#-32)
+ }
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall
+ {
+ r25:24 = memd(fp+#-40)
+ r23:22 = memd(fp+#-32)
+ }
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall
+ {
+ r23:22 = memd(fp+#-32)
+ r21:20 = memd(fp+#-24)
+ }
+ r19:18 = memd(fp+#-16)
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall
+
+
+FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall
+ r19:18 = memd(fp+#-16)
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall
+ {
+ r17:16 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall
+
+
+FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe
+ r27:26 = memd(fp+#-48)
+ {
+ r25:24 = memd(fp+#-40)
+ r23:22 = memd(fp+#-32)
+ }
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r27_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe
+ {
+ r25:24 = memd(fp+#-40)
+ r23:22 = memd(fp+#-32)
+ }
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r25_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe
+ {
+ r23:22 = memd(fp+#-32)
+ }
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r23_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe
+ {
+ r21:20 = memd(fp+#-24)
+ r19:18 = memd(fp+#-16)
+ }
+ {
+ r17:16 = memd(fp+#-8)
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r21_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe
+ {
+ r19:18 = memd(fp+#-16)
+ r17:16 = memd(fp+#-8)
+ }
+ {
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r19_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe
+ {
+ r17:16 = memd(fp+#-8)
+ dealloc_return
+ }
+FUNCTION_END __restore_r16_through_r17_and_deallocframe
+
+FUNCTION_BEGIN __deallocframe
+ dealloc_return
+FUNCTION_END __deallocframe
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S
new file mode 100644
index 000000000000..3258f15a3267
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S
@@ -0,0 +1,157 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/* Functions that implement common sequences in function prologues and epilogues
+ used to save code size */
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .globl \name
+ .type \name, @function
+ .falign
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+ .macro FALLTHROUGH_TAIL_CALL name0 name1
+ .size \name0, . - \name0
+ .globl \name1
+ .type \name1, @function
+ .falign
+\name1:
+ .endm
+
+
+
+
+/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
+ fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */
+
+
+
+
+/* The compiler knows that the __save_* functions clobber LR. No other
+ registers should be used without informing the compiler. */
+
+/* Since we can only issue one store per packet, we don't hurt performance by
+ simply jumping to the right point in this sequence of stores. */
+
+FUNCTION_BEGIN __save_r27_through_r16
+ memd(fp+#-48) = r17:16
+FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
+ memd(fp+#-40) = r19:18
+FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
+ memd(fp+#-32) = r21:20
+FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
+ memd(fp+#-24) = r23:22
+FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
+ memd(fp+#-16) = r25:24
+ {
+ memd(fp+#-8) = r27:26
+ jumpr lr
+ }
+FUNCTION_END __save_r27_through_r24
+
+
+
+
+/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel
+ with deallocframe. That way, the return gets the old value of lr, which is
+ where these functions need to return, and at the same time, lr gets the value
+ it needs going into the sibcall. */
+
+FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
+ {
+ r21:20 = memd(fp+#-32)
+ r23:22 = memd(fp+#-24)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
+ {
+ r25:24 = memd(fp+#-16)
+ jump __restore_r27_through_r26_and_deallocframe_before_sibcall
+ }
+FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
+
+
+
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
+ r17:16 = memd(fp+#-48)
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
+ {
+ r19:18 = memd(fp+#-40)
+ r21:20 = memd(fp+#-32)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
+ {
+ r23:22 = memd(fp+#-24)
+ r25:24 = memd(fp+#-16)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
+ {
+ r27:26 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
+
+
+
+
+/* Here we use the extra load bandwidth to restore LR early, allowing the return
+ to occur in parallel with the deallocframe. */
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
+ {
+ r17:16 = memd(fp+#-48)
+ r19:18 = memd(fp+#-40)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
+ {
+ r21:20 = memd(fp+#-32)
+ r23:22 = memd(fp+#-24)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
+ {
+ lr = memw(fp+#4)
+ r25:24 = memd(fp+#-16)
+ }
+ {
+ r27:26 = memd(fp+#-8)
+ deallocframe
+ jumpr lr
+ }
+FUNCTION_END __restore_r27_through_r24_and_deallocframe
+
+
+
+
+/* Here the load bandwidth is maximized for all three functions. */
+
+FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
+ {
+ r19:18 = memd(fp+#-40)
+ r21:20 = memd(fp+#-32)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
+ {
+ r23:22 = memd(fp+#-24)
+ r25:24 = memd(fp+#-16)
+ }
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
+ {
+ r27:26 = memd(fp+#-8)
+ deallocframe
+ }
+ jumpr lr
+FUNCTION_END __restore_r27_through_r26_and_deallocframe
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S b/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S
new file mode 100644
index 000000000000..4173f86a4f54
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S
@@ -0,0 +1,398 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Multiply */
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define EXPA r4
+#define EXPB r5
+#define EXPB_A r5:4
+
+#define ZTMP r7:6
+#define ZTMPH r7
+#define ZTMPL r6
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define BTMP r9:8
+#define BTMPH r9
+#define BTMPL r8
+
+#define ATMP2 r11:10
+#define ATMP2H r11
+#define ATMP2L r10
+
+#define EXPDIFF r15
+#define EXTRACTOFF r14
+#define EXTRACTAMT r15:14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+#define SR_BIT_INEXACT 5
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+#define NORMAL p3
+#define BIGB p2
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+ .text
+ .global __hexagon_adddf3
+ .global __hexagon_subdf3
+ .type __hexagon_adddf3, @function
+ .type __hexagon_subdf3, @function
+
+Q6_ALIAS(adddf3)
+FAST_ALIAS(adddf3)
+FAST2_ALIAS(adddf3)
+Q6_ALIAS(subdf3)
+FAST_ALIAS(subdf3)
+FAST2_ALIAS(subdf3)
+
+ .p2align 5
+__hexagon_adddf3:
+ {
+ EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+ ATMP = combine(##0x20000000,#0)
+ }
+ {
+ NORMAL = dfclass(A,#2)
+ NORMAL = dfclass(B,#2)
+ BTMP = ATMP
+ BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
+ }
+ {
+ if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
+ if (BIGB) A = B // if B >> A, swap A and B
+ if (BIGB) B = A // If B >> A, swap A and B
+ if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
+ }
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
+ BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
+ EXPDIFF = sub(EXPA,EXPB)
+ ZTMP = combine(#62,#1)
+ }
+#undef BIGB
+#undef NORMAL
+#define B_POS p3
+#define A_POS p2
+#define NO_STICKIES p1
+.Ladd_continue:
+ {
+ EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
+ // will collapse to sticky bit
+ ATMP2 = neg(ATMP)
+ A_POS = cmp.gt(AH,#-1)
+ EXTRACTOFF = #0
+ }
+ {
+ if (!A_POS) ATMP = ATMP2
+ ATMP2 = extractu(BTMP,EXTRACTAMT)
+ BTMP = ASR(BTMP,EXPDIFF)
+#undef EXTRACTAMT
+#undef EXPDIFF
+#undef EXTRACTOFF
+#define ZERO r15:14
+ ZERO = #0
+ }
+ {
+ NO_STICKIES = cmp.eq(ATMP2,ZERO)
+ if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
+ EXPB = add(EXPA,#-BIAS-60)
+ B_POS = cmp.gt(BH,#-1)
+ }
+ {
+ ATMP = add(ATMP,BTMP) // ADD!!!
+ ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
+ ZTMP = combine(#54,##2045)
+ }
+ {
+ p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
+ p0 = !cmp.gtu(EXPA,ZTMPL)
+ if (!p0.new) jump:nt .Ladd_ovf_unf
+ if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
+ }
+ {
+ A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
+ p0 = cmp.eq(ATMPH,#0)
+ p0 = cmp.eq(ATMPL,#0)
+ if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
+ }
+ {
+ AH += asl(EXPB,#HI_MANTBITS)
+ jumpr r31
+ }
+ .falign
+__hexagon_subdf3:
+ {
+ BH = togglebit(BH,#31)
+ jump __qdsp_adddf3
+ }
+
+
+ .falign
+.Ladd_zero:
+ // True zero, full cancellation
+ // +0 unless round towards negative infinity
+ {
+ TMP = USR
+ A = #0
+ BH = #1
+ }
+ {
+ TMP = extractu(TMP,#2,#22)
+ BH = asl(BH,#31)
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = xor(AH,BH)
+ jumpr r31
+ }
+ .falign
+.Ladd_ovf_unf:
+ // Overflow or Denormal is possible
+ // Good news: Underflow flag is not possible!
+ /*
+ * ATMP has 2's complement value
+ *
+ * EXPA has A's exponent, EXPB has EXPA-BIAS-60
+ *
+ * Convert, extract exponent, add adjustment.
+ * If > 2046, overflow
+ * If <= 0, denormal
+ *
+ * Note that we've not done our zero check yet, so do that too
+ *
+ */
+ {
+ A = convert_d2df(ATMP)
+ p0 = cmp.eq(ATMPH,#0)
+ p0 = cmp.eq(ATMPL,#0)
+ if (p0.new) jump:nt .Ladd_zero
+ }
+ {
+ TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ AH += asl(EXPB,#HI_MANTBITS)
+ }
+ {
+ EXPB = add(EXPB,TMP)
+ B = combine(##0x00100000,#0)
+ }
+ {
+ p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
+ if (p0.new) jump:nt .Ladd_ovf
+ }
+ {
+ p0 = cmp.gt(EXPB,#0)
+ if (p0.new) jumpr:t r31
+ TMP = sub(#1,EXPB)
+ }
+ {
+ B = insert(A,#MANTBITS,#0)
+ A = ATMP
+ }
+ {
+ B = lsr(B,TMP)
+ }
+ {
+ A = insert(B,#63,#0)
+ jumpr r31
+ }
+ .falign
+.Ladd_ovf:
+ // We get either max finite value or infinity. Either way, overflow+inexact
+ {
+ A = ATMP // 2's complement value
+ TMP = USR
+ ATMP = combine(##0x7fefffff,#-1) // positive max finite
+ }
+ {
+ EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
+ TMP = or(TMP,#0x28) // inexact + overflow
+ BTMP = combine(##0x7ff00000,#0) // positive infinity
+ }
+ {
+ USR = TMP
+ EXPB ^= lsr(AH,#31) // Does sign match rounding?
+ TMP = EXPB // unmodified rounding mode
+ }
+ {
+ p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
+ p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
+ if (p0.new) ATMP = BTMP // we should get infinity
+ }
+ {
+ A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
+ }
+ {
+ p0 = dfcmp.eq(A,A)
+ jumpr r31
+ }
+
+.Ladd_abnormal:
+ {
+ ATMP = extractu(A,#63,#0) // strip off sign
+ BTMP = extractu(B,#63,#0) // strip off sign
+ }
+ {
+ p3 = cmp.gtu(ATMP,BTMP)
+ if (!p3.new) A = B // sort values
+ if (!p3.new) B = A // sort values
+ }
+ {
+ // Any NaN --> NaN, possibly raise invalid if sNaN
+ p0 = dfclass(A,#0x0f) // A not NaN?
+ if (!p0.new) jump:nt .Linvalid_nan_add
+ if (!p3) ATMP = BTMP
+ if (!p3) BTMP = ATMP
+ }
+ {
+ // Infinity + non-infinity number is infinity
+ // Infinity + infinity --> inf or nan
+ p1 = dfclass(A,#0x08) // A is infinity
+ if (p1.new) jump:nt .Linf_add
+ }
+ {
+ p2 = dfclass(B,#0x01) // B is zero
+ if (p2.new) jump:nt .LB_zero // so return A or special 0+0
+ ATMP = #0
+ }
+ // We are left with adding one or more subnormals
+ {
+ p0 = dfclass(A,#4)
+ if (p0.new) jump:nt .Ladd_two_subnormal
+ ATMP = combine(##0x20000000,#0)
+ }
+ {
+ EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXPB = #1
+ // BTMP already ABS(B)
+ BTMP = asl(BTMP,#EXPBITS-2)
+ }
+#undef ZERO
+#define EXTRACTOFF r14
+#define EXPDIFF r15
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-2)
+ EXPDIFF = sub(EXPA,EXPB)
+ ZTMP = combine(#62,#1)
+ jump .Ladd_continue
+ }
+
+.Ladd_two_subnormal:
+ {
+ ATMP = extractu(A,#63,#0)
+ BTMP = extractu(B,#63,#0)
+ }
+ {
+ ATMP = neg(ATMP)
+ BTMP = neg(BTMP)
+ p0 = cmp.gt(AH,#-1)
+ p1 = cmp.gt(BH,#-1)
+ }
+ {
+ if (p0) ATMP = A
+ if (p1) BTMP = B
+ }
+ {
+ ATMP = add(ATMP,BTMP)
+ }
+ {
+ BTMP = neg(ATMP)
+ p0 = cmp.gt(ATMPH,#-1)
+ B = #0
+ }
+ {
+ if (!p0) A = BTMP
+ if (p0) A = ATMP
+ BH = ##0x80000000
+ }
+ {
+ if (!p0) AH = or(AH,BH)
+ p0 = dfcmp.eq(A,B)
+ if (p0.new) jump:nt .Lzero_plus_zero
+ }
+ {
+ jumpr r31
+ }
+
+.Linvalid_nan_add:
+ {
+ TMP = convert_df2sf(A) // will generate invalid if sNaN
+ p0 = dfclass(B,#0x0f) // if B is not NaN
+ if (p0.new) B = A // make it whatever A is
+ }
+ {
+ BL = convert_df2sf(B) // will generate invalid if sNaN
+ A = #-1
+ jumpr r31
+ }
+ .falign
+.LB_zero:
+ {
+ p0 = dfcmp.eq(ATMP,A) // is A also zero?
+ if (!p0.new) jumpr:t r31 // If not, just return A
+ }
+ // 0 + 0 is special
+ // if equal integral values, they have the same sign, which is fine for all rounding
+ // modes.
+ // If unequal in sign, we get +0 for all rounding modes except round down
+.Lzero_plus_zero:
+ {
+ p0 = cmp.eq(A,B)
+ if (p0.new) jumpr:t r31
+ }
+ {
+ TMP = USR
+ }
+ {
+ TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+ A = #0
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = ##0x80000000
+ jumpr r31
+ }
+.Linf_add:
+ // adding infinities is only OK if they are equal
+ {
+ p0 = !cmp.eq(AH,BH) // Do they have different signs
+ p0 = dfclass(B,#8) // And is B also infinite?
+ if (!p0.new) jumpr:t r31 // If not, just a normal inf
+ }
+ {
+ BL = ##0x7f800001 // sNAN
+ }
+ {
+ A = convert_sf2df(BL) // trigger invalid, set NaN
+ jumpr r31
+ }
+END(__hexagon_adddf3)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S b/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S
new file mode 100644
index 000000000000..0c5dbe272c89
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S
@@ -0,0 +1,492 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Divide */
+
+#define A r1:0
+#define AH r1
+#define AL r0
+
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define Q r5:4
+#define QH r5
+#define QL r4
+
+#define PROD r7:6
+#define PRODHI r7
+#define PRODLO r6
+
+#define SFONE r8
+#define SFDEN r9
+#define SFERROR r10
+#define SFRECIP r11
+
+#define EXPBA r13:12
+#define EXPB r13
+#define EXPA r12
+
+#define REMSUB2 r15:14
+
+
+
+#define SIGN r28
+
+#define Q_POSITIVE p3
+#define NORMAL p2
+#define NO_OVF_UNF p1
+#define P_TMP p0
+
+#define RECIPEST_SHIFT 3
+#define QADJ 61
+
+#define DFCLASS_NORMAL 0x02
+#define DFCLASS_NUMBER 0x0F
+#define DFCLASS_INFINITE 0x08
+#define DFCLASS_ZERO 0x01
+#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
+#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
+
+#define DF_MANTBITS 52
+#define DF_EXPBITS 11
+#define SF_MANTBITS 23
+#define SF_EXPBITS 8
+#define DF_BIAS 0x3ff
+
+#define SR_ROUND_OFF 22
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+ .text
+ .global __hexagon_divdf3
+ .type __hexagon_divdf3,@function
+ Q6_ALIAS(divdf3)
+ FAST_ALIAS(divdf3)
+ FAST2_ALIAS(divdf3)
+ .p2align 5
+__hexagon_divdf3:
+ {
+ NORMAL = dfclass(A,#DFCLASS_NORMAL)
+ NORMAL = dfclass(B,#DFCLASS_NORMAL)
+ EXPBA = combine(BH,AH)
+ SIGN = xor(AH,BH)
+ }
+#undef A
+#undef AH
+#undef AL
+#undef B
+#undef BH
+#undef BL
+#define REM r1:0
+#define REMHI r1
+#define REMLO r0
+#define DENOM r3:2
+#define DENOMHI r3
+#define DENOMLO r2
+ {
+ if (!NORMAL) jump .Ldiv_abnormal
+ PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
+ SFONE = ##0x3f800001
+ }
+ {
+ SFDEN = or(SFONE,PRODLO)
+ EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
+ EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
+ Q_POSITIVE = cmp.gt(SIGN,#-1)
+ }
+#undef SIGN
+#define ONE r28
+.Ldenorm_continue:
+ {
+ SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
+ SFERROR = and(SFONE,#-2)
+ ONE = #1
+ EXPA = sub(EXPA,EXPB)
+ }
+#undef EXPB
+#define RECIPEST r13
+ {
+ SFERROR -= sfmpy(SFRECIP,SFDEN):lib
+ REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
+ RECIPEST = ##0x00800000 << RECIPEST_SHIFT
+ }
+ {
+ SFRECIP += sfmpy(SFRECIP,SFERROR):lib
+ DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
+ SFERROR = and(SFONE,#-2)
+ }
+ {
+ SFERROR -= sfmpy(SFRECIP,SFDEN):lib
+ QH = #-DF_BIAS+1
+ QL = #DF_BIAS-1
+ }
+ {
+ SFRECIP += sfmpy(SFRECIP,SFERROR):lib
+ NO_OVF_UNF = cmp.gt(EXPA,QH)
+ NO_OVF_UNF = !cmp.gt(EXPA,QL)
+ }
+ {
+ RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
+ Q = #0
+ EXPA = add(EXPA,#-QADJ)
+ }
+#undef SFERROR
+#undef SFRECIP
+#define TMP r10
+#define TMP1 r11
+ {
+ RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
+ }
+
+#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
+ { \
+ PROD = mpyu(RECIPEST,REMHI); \
+ REM = asl(REM,# ## ( REMSHIFT )); \
+ }; \
+ { \
+ PRODLO = # ## 0; \
+ REM -= mpyu(PRODHI,DENOMLO); \
+ REMSUB2 = mpyu(PRODHI,DENOMHI); \
+ }; \
+ { \
+ Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
+ REM -= asl(REMSUB2, # ## 32); \
+ EXTRA \
+ }
+
+
+ DIV_ITER1B(ASL,14,15,)
+ DIV_ITER1B(ASR,1,15,)
+ DIV_ITER1B(ASR,16,15,)
+ DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
+
+#undef REMSUB2
+#define TMPPAIR r15:14
+#define TMPPAIRHI r15
+#define TMPPAIRLO r14
+#undef RECIPEST
+#define EXPB r13
+ {
+ // compare or sub with carry
+ TMPPAIR = sub(REM,DENOM)
+ P_TMP = cmp.gtu(DENOM,REM)
+ // set up amt to add to q
+ if (!P_TMP.new) PRODLO = #2
+ }
+ {
+ Q = add(Q,PROD)
+ if (!P_TMP) REM = TMPPAIR
+ TMPPAIR = #0
+ }
+ {
+ P_TMP = cmp.eq(REM,TMPPAIR)
+ if (!P_TMP.new) QL = or(QL,ONE)
+ }
+ {
+ PROD = neg(Q)
+ }
+ {
+ if (!Q_POSITIVE) Q = PROD
+ }
+#undef REM
+#undef REMHI
+#undef REMLO
+#undef DENOM
+#undef DENOMLO
+#undef DENOMHI
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+ {
+ A = convert_d2df(Q)
+ if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
+ }
+ {
+ AH += asl(EXPA,#DF_MANTBITS-32)
+ jumpr r31
+ }
+
+.Ldiv_ovf_unf:
+ {
+ AH += asl(EXPA,#DF_MANTBITS-32)
+ EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
+ }
+ {
+ PROD = abs(Q)
+ EXPA = add(EXPA,EXPB)
+ }
+ {
+ P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
+ if (P_TMP.new) jump:nt .Ldiv_ovf
+ }
+ {
+ P_TMP = cmp.gt(EXPA,#0)
+ if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
+ }
+ /* Underflow */
+ /* We know what the infinite range exponent should be (EXPA) */
+ /* Q is 2's complement, PROD is abs(Q) */
+ /* Normalize Q, shift right, add a high bit, convert, change exponent */
+
+#define FUDGE1 7 // how much to shift right
+#define FUDGE2 4 // how many guard/round to keep at lsbs
+
+ {
+ EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
+ EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
+ TMP = USR
+ TMP1 = #63
+ }
+ {
+ EXPB = min(EXPA,TMP1)
+ TMP1 = or(TMP,#0x030)
+ PROD = asl(PROD,EXPB)
+ EXPA = #0
+ }
+ {
+ TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
+ PROD = lsr(PROD,EXPB) // shift out bits
+ B = #1
+ }
+ {
+ P_TMP = cmp.gtu(B,TMPPAIR)
+ if (!P_TMP.new) PRODLO = or(BL,PRODLO)
+ PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
+ }
+ {
+ Q = neg(PROD)
+ P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
+ if (!P_TMP.new) TMP = TMP1
+ }
+ {
+ USR = TMP
+ if (Q_POSITIVE) Q = PROD
+ TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
+ }
+ {
+ A = convert_d2df(Q)
+ }
+ {
+ AH += asl(TMP,#DF_MANTBITS-32)
+ jumpr r31
+ }
+
+
+.Lpossible_unf:
+ /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
+ /* The answer is correct, but we need to raise Underflow */
+ {
+ B = extractu(A,#63,#0)
+ TMPPAIR = combine(##0x00100000,#0) // min normal
+ TMP = #0x7FFF
+ }
+ {
+ P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
+ P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
+ }
+
+#if (__HEXAGON_ARCH__ == 60)
+ TMP = USR // If not, just return
+ if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
+ // Note that inexact is already set...
+#else
+ {
+ if (!P_TMP) jumpr r31 // If not, just return
+ TMP = USR // Else, we want to set Unf+Inexact
+ } // Note that inexact is already set...
+#endif
+ {
+ TMP = or(TMP,#0x30)
+ }
+ {
+ USR = TMP
+ }
+ {
+ p0 = dfcmp.eq(A,A)
+ jumpr r31
+ }
+
+.Ldiv_ovf:
+ /*
+ * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
+ */
+ {
+ TMP = USR
+ B = combine(##0x7fefffff,#-1)
+ AH = mux(Q_POSITIVE,#0,#-1)
+ }
+ {
+ PROD = combine(##0x7ff00000,#0)
+ QH = extractu(TMP,#2,#SR_ROUND_OFF)
+ TMP = or(TMP,#0x28)
+ }
+ {
+ USR = TMP
+ QH ^= lsr(AH,#31)
+ QL = QH
+ }
+ {
+ p0 = !cmp.eq(QL,#1) // if not round-to-zero
+ p0 = !cmp.eq(QH,#2) // and not rounding the other way
+ if (p0.new) B = PROD // go to inf
+ p0 = dfcmp.eq(B,B) // get exceptions
+ }
+ {
+ A = insert(B,#63,#0)
+ jumpr r31
+ }
+
+#undef ONE
+#define SIGN r28
+#undef NORMAL
+#undef NO_OVF_UNF
+#define P_INF p1
+#define P_ZERO p2
+.Ldiv_abnormal:
+ {
+ P_TMP = dfclass(A,#DFCLASS_NUMBER)
+ P_TMP = dfclass(B,#DFCLASS_NUMBER)
+ Q_POSITIVE = cmp.gt(SIGN,#-1)
+ }
+ {
+ P_INF = dfclass(A,#DFCLASS_INFINITE)
+ P_INF = dfclass(B,#DFCLASS_INFINITE)
+ }
+ {
+ P_ZERO = dfclass(A,#DFCLASS_ZERO)
+ P_ZERO = dfclass(B,#DFCLASS_ZERO)
+ }
+ {
+ if (!P_TMP) jump .Ldiv_nan
+ if (P_INF) jump .Ldiv_invalid
+ }
+ {
+ if (P_ZERO) jump .Ldiv_invalid
+ }
+ {
+ P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
+ P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
+ }
+ {
+ P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
+ P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
+ }
+ {
+ if (!P_ZERO) jump .Ldiv_zero_result
+ if (!P_INF) jump .Ldiv_inf_result
+ }
+ /* Now we've narrowed it down to (de)normal / (de)normal */
+ /* Set up A/EXPA B/EXPB and go back */
+#undef P_ZERO
+#undef P_INF
+#define P_TMP2 p1
+ {
+ P_TMP = dfclass(A,#DFCLASS_NORMAL)
+ P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
+ TMP = ##0x00100000
+ }
+ {
+ EXPBA = combine(BH,AH)
+ AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
+ BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
+ }
+ {
+ if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
+ if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
+ }
+ {
+ QH = add(clb(A),#-DF_EXPBITS)
+ QL = add(clb(B),#-DF_EXPBITS)
+ TMP = #1
+ }
+ {
+ EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
+ EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
+ }
+ {
+ A = asl(A,QH)
+ B = asl(B,QL)
+ if (!P_TMP) EXPA = sub(TMP,QH)
+ if (!P_TMP2) EXPB = sub(TMP,QL)
+ } // recreate values needed by resume coke
+ {
+ PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
+ }
+ {
+ SFDEN = or(SFONE,PRODLO)
+ jump .Ldenorm_continue
+ }
+
+.Ldiv_zero_result:
+ {
+ AH = xor(AH,BH)
+ B = #0
+ }
+ {
+ A = insert(B,#63,#0)
+ jumpr r31
+ }
+.Ldiv_inf_result:
+ {
+ p2 = dfclass(B,#DFCLASS_ZERO)
+ p2 = dfclass(A,#DFCLASS_NONINFINITE)
+ }
+ {
+ TMP = USR
+ if (!p2) jump 1f
+ AH = xor(AH,BH)
+ }
+ {
+ TMP = or(TMP,#0x04) // DBZ
+ }
+ {
+ USR = TMP
+ }
+1:
+ {
+ B = combine(##0x7ff00000,#0)
+ p0 = dfcmp.uo(B,B) // take possible exception
+ }
+ {
+ A = insert(B,#63,#0)
+ jumpr r31
+ }
+.Ldiv_nan:
+ {
+ p0 = dfclass(A,#0x10)
+ p1 = dfclass(B,#0x10)
+ if (!p0.new) A = B
+ if (!p1.new) B = A
+ }
+ {
+ QH = convert_df2sf(A) // get possible invalid exceptions
+ QL = convert_df2sf(B)
+ }
+ {
+ A = #-1
+ jumpr r31
+ }
+
+.Ldiv_invalid:
+ {
+ TMP = ##0x7f800001
+ }
+ {
+ A = convert_sf2df(TMP) // get invalid, get DF qNaN
+ jumpr r31
+ }
+END(__hexagon_divdf3)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dffma.S b/contrib/compiler-rt/lib/builtins/hexagon/dffma.S
new file mode 100644
index 000000000000..97b885a3bf27
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dffma.S
@@ -0,0 +1,705 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+/* Double Precision Multiply */
+
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+#define C r5:4
+#define CH r5
+#define CL r4
+
+
+
+#define BTMP r15:14
+#define BTMPH r15
+#define BTMPL r14
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define CTMP r11:10
+#define CTMPH r11
+#define CTMPL r10
+
+#define PP_LL r9:8
+#define PP_LL_H r9
+#define PP_LL_L r8
+
+#define PP_ODD r7:6
+#define PP_ODD_H r7
+#define PP_ODD_L r6
+
+
+#define PP_HH r17:16
+#define PP_HH_H r17
+#define PP_HH_L r16
+
+#define EXPA r18
+#define EXPB r19
+#define EXPBA r19:18
+
+#define TMP r28
+
+#define P_TMP p0
+#define PROD_NEG p3
+#define EXACT p2
+#define SWAP p1
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1023
+#define STACKSPACE 32
+
+#define ADJUST 4
+
+#define FUDGE 7
+#define FUDGE2 3
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+ /*
+ * First, classify for normal values, and abort if abnormal
+ *
+ * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
+ *
+ * Since we know that the 2 MSBs of the H registers is zero, we should never carry
+ * the partial products that involve the H registers
+ *
+ * Try to buy X slots, at the expense of latency if needed
+ *
+ * We will have PP_HH with the upper bits of the product, PP_LL with the lower
+ * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
+ * PP_HH can have a minimum of 0x0100_0000_0000_0000
+ *
+ * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
+ *
+ * We need to align CTMP.
+ * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
+ * If CTMP << PP align CTMP and add 128 bits. Then compute sticky
+ * If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
+ *
+ * Convert partial product and CTMP to 2's complement prior to addition
+ *
+ * After we add, we need to normalize into upper 64 bits, then compute sticky.
+ *
+ *
+ */
+
+ .text
+ .global __hexagon_fmadf4
+ .type __hexagon_fmadf4,@function
+ .global __hexagon_fmadf5
+ .type __hexagon_fmadf5,@function
+ .global fma
+ .type fma,@function
+ Q6_ALIAS(fmadf5)
+ .p2align 5
+__hexagon_fmadf4:
+__hexagon_fmadf5:
+fma:
+ {
+ P_TMP = dfclass(A,#2)
+ P_TMP = dfclass(B,#2)
+ ATMP = #0
+ BTMP = #0
+ }
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-3)
+ BTMP = insert(B,#MANTBITS,#EXPBITS-3)
+ PP_ODD_H = ##0x10000000
+ allocframe(#STACKSPACE)
+ }
+ {
+ PP_LL = mpyu(ATMPL,BTMPL)
+ if (!P_TMP) jump .Lfma_abnormal_ab
+ ATMPH = or(ATMPH,PP_ODD_H)
+ BTMPH = or(BTMPH,PP_ODD_H)
+ }
+ {
+ P_TMP = dfclass(C,#2)
+ if (!P_TMP.new) jump:nt .Lfma_abnormal_c
+ CTMP = combine(PP_ODD_H,#0)
+ PP_ODD = combine(#0,PP_LL_H)
+ }
+.Lfma_abnormal_c_restart:
+ {
+ PP_ODD += mpyu(BTMPL,ATMPH)
+ CTMP = insert(C,#MANTBITS,#EXPBITS-3)
+ memd(r29+#0) = PP_HH
+ memd(r29+#8) = EXPBA
+ }
+ {
+ PP_ODD += mpyu(ATMPL,BTMPH)
+ EXPBA = neg(CTMP)
+ P_TMP = cmp.gt(CH,#-1)
+ TMP = xor(AH,BH)
+ }
+ {
+ EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+ PP_HH = combine(#0,PP_ODD_H)
+ if (!P_TMP) CTMP = EXPBA
+ }
+ {
+ PP_HH += mpyu(ATMPH,BTMPH)
+ PP_LL = combine(PP_ODD_L,PP_LL_L)
+#undef PP_ODD
+#undef PP_ODD_H
+#undef PP_ODD_L
+#undef ATMP
+#undef ATMPL
+#undef ATMPH
+#undef BTMP
+#undef BTMPL
+#undef BTMPH
+#define RIGHTLEFTSHIFT r13:12
+#define RIGHTSHIFT r13
+#define LEFTSHIFT r12
+
+ EXPA = add(EXPA,EXPB)
+#undef EXPB
+#undef EXPBA
+#define EXPC r19
+#define EXPCA r19:18
+ EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
+ }
+ /* PP_HH:PP_LL now has product */
+ /* CTMP is negated */
+ /* EXPA,B,C are extracted */
+ /*
+ * We need to negate PP
+ * Since we will be adding with carry later, if we need to negate,
+ * just invert all bits now, which we can do conditionally and in parallel
+ */
+#define PP_HH_TMP r15:14
+#define PP_LL_TMP r7:6
+ {
+ EXPA = add(EXPA,#-BIAS+(ADJUST))
+ PROD_NEG = !cmp.gt(TMP,#-1)
+ PP_LL_TMP = #0
+ PP_HH_TMP = #0
+ }
+ {
+ PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
+ P_TMP = !cmp.gt(TMP,#-1)
+ SWAP = cmp.gt(EXPC,EXPA) // If C >> PP
+ if (SWAP.new) EXPCA = combine(EXPA,EXPC)
+ }
+ {
+ PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
+ if (P_TMP) PP_LL = PP_LL_TMP
+#undef PP_LL_TMP
+#define CTMP2 r7:6
+#define CTMP2H r7
+#define CTMP2L r6
+ CTMP2 = #0
+ EXPC = sub(EXPA,EXPC)
+ }
+ {
+ if (P_TMP) PP_HH = PP_HH_TMP
+ P_TMP = cmp.gt(EXPC,#63)
+ if (SWAP) PP_LL = CTMP2
+ if (SWAP) CTMP2 = PP_LL
+ }
+#undef PP_HH_TMP
+//#define ONE r15:14
+//#define S_ONE r14
+#define ZERO r15:14
+#define S_ZERO r15
+#undef PROD_NEG
+#define P_CARRY p3
+ {
+ if (SWAP) PP_HH = CTMP // Swap C and PP
+ if (SWAP) CTMP = PP_HH
+ if (P_TMP) EXPC = add(EXPC,#-64)
+ TMP = #63
+ }
+ {
+ // If diff > 63, pre-shift-right by 64...
+ if (P_TMP) CTMP2 = CTMP
+ TMP = asr(CTMPH,#31)
+ RIGHTSHIFT = min(EXPC,TMP)
+ LEFTSHIFT = #0
+ }
+#undef C
+#undef CH
+#undef CL
+#define STICKIES r5:4
+#define STICKIESH r5
+#define STICKIESL r4
+ {
+ if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64
+ STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
+ CTMP2 = lsr(CTMP2,RIGHTSHIFT)
+ LEFTSHIFT = sub(#64,RIGHTSHIFT)
+ }
+ {
+ ZERO = #0
+ TMP = #-2
+ CTMP2 |= lsl(CTMP,LEFTSHIFT)
+ CTMP = asr(CTMP,RIGHTSHIFT)
+ }
+ {
+ P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift
+ if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
+#undef ZERO
+#define ONE r15:14
+#define S_ONE r14
+ ONE = #1
+ STICKIES = #0
+ }
+ {
+ PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky
+ }
+ {
+ PP_HH = add(CTMP,PP_HH,P_CARRY):carry
+ TMP = #62
+ }
+ /*
+ * PP_HH:PP_LL now holds the sum
+ * We may need to normalize left, up to ??? bits.
+ *
+ * I think that if we have massive cancellation, the range we normalize by
+ * is still limited
+ */
+ {
+ LEFTSHIFT = add(clb(PP_HH),#-2)
+ if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits?
+ }
+ /* We had all sign bits, shift left by 62. */
+ {
+ CTMP = extractu(PP_LL,#62,#2)
+ PP_LL = asl(PP_LL,#62)
+ EXPA = add(EXPA,#-62) // And adjust exponent of result
+ }
+ {
+ PP_HH = insert(CTMP,#62,#0) // Then shift 63
+ }
+ {
+ LEFTSHIFT = add(clb(PP_HH),#-2)
+ }
+ .falign
+1:
+ {
+ CTMP = asl(PP_HH,LEFTSHIFT)
+ STICKIES |= asl(PP_LL,LEFTSHIFT)
+ RIGHTSHIFT = sub(#64,LEFTSHIFT)
+ EXPA = sub(EXPA,LEFTSHIFT)
+ }
+ {
+ CTMP |= lsr(PP_LL,RIGHTSHIFT)
+ EXACT = cmp.gtu(ONE,STICKIES)
+ TMP = #BIAS+BIAS-2
+ }
+ {
+ if (!EXACT) CTMPL = or(CTMPL,S_ONE)
+ // If EXPA is overflow/underflow, jump to ovf_unf
+ P_TMP = !cmp.gt(EXPA,TMP)
+ P_TMP = cmp.gt(EXPA,#1)
+ if (!P_TMP.new) jump:nt .Lfma_ovf_unf
+ }
+ {
+ // XXX: FIXME: should PP_HH for check of zero be CTMP?
+ P_TMP = cmp.gtu(ONE,CTMP) // is result true zero?
+ A = convert_d2df(CTMP)
+ EXPA = add(EXPA,#-BIAS-60)
+ PP_HH = memd(r29+#0)
+ }
+ {
+ AH += asl(EXPA,#HI_MANTBITS)
+ EXPCA = memd(r29+#8)
+ if (!P_TMP) dealloc_return // not zero, return
+ }
+.Ladd_yields_zero:
+ /* We had full cancellation. Return +/- zero (-0 when round-down) */
+ {
+ TMP = USR
+ A = #0
+ }
+ {
+ TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+ PP_HH = memd(r29+#0)
+ EXPCA = memd(r29+#8)
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = ##0x80000000
+ dealloc_return
+ }
+
+#undef RIGHTLEFTSHIFT
+#undef RIGHTSHIFT
+#undef LEFTSHIFT
+#undef CTMP2
+#undef CTMP2H
+#undef CTMP2L
+
+.Lfma_ovf_unf:
+ {
+ p0 = cmp.gtu(ONE,CTMP)
+ if (p0.new) jump:nt .Ladd_yields_zero
+ }
+ {
+ A = convert_d2df(CTMP)
+ EXPA = add(EXPA,#-BIAS-60)
+ TMP = EXPA
+ }
+#define NEW_EXPB r7
+#define NEW_EXPA r6
+ {
+ AH += asl(EXPA,#HI_MANTBITS)
+ NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ }
+ {
+ NEW_EXPA = add(EXPA,NEW_EXPB)
+ PP_HH = memd(r29+#0)
+ EXPCA = memd(r29+#8)
+#undef PP_HH
+#undef PP_HH_H
+#undef PP_HH_L
+#undef EXPCA
+#undef EXPC
+#undef EXPA
+#undef PP_LL
+#undef PP_LL_H
+#undef PP_LL_L
+#define EXPA r6
+#define EXPB r7
+#define EXPBA r7:6
+#define ATMP r9:8
+#define ATMPH r9
+#define ATMPL r8
+#undef NEW_EXPB
+#undef NEW_EXPA
+ ATMP = abs(CTMP)
+ }
+ {
+ p0 = cmp.gt(EXPA,##BIAS+BIAS)
+ if (p0.new) jump:nt .Lfma_ovf
+ }
+ {
+ p0 = cmp.gt(EXPA,#0)
+ if (p0.new) jump:nt .Lpossible_unf
+ }
+ {
+ // TMP has original EXPA.
+ // ATMP is corresponding value
+ // Normalize ATMP and shift right to correct location
+ EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize
+ EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize
+ p3 = cmp.gt(CTMPH,#-1)
+ }
+ /* Underflow */
+ /* We know that the infinte range exponent should be EXPA */
+ /* CTMP is 2's complement, ATMP is abs(CTMP) */
+ {
+ EXPA = add(EXPA,EXPB) // how much to shift back right
+ ATMP = asl(ATMP,EXPB) // shift left
+ AH = USR
+ TMP = #63
+ }
+ {
+ EXPB = min(EXPA,TMP)
+ EXPA = #0
+ AL = #0x0030
+ }
+ {
+ B = extractu(ATMP,EXPBA)
+ ATMP = asr(ATMP,EXPB)
+ }
+ {
+ p0 = cmp.gtu(ONE,B)
+ if (!p0.new) ATMPL = or(ATMPL,S_ONE)
+ ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
+ }
+ {
+ CTMP = neg(ATMP)
+ p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
+ if (!p1.new) AH = or(AH,AL)
+ B = #0
+ }
+ {
+ if (p3) CTMP = ATMP
+ USR = AH
+ TMP = #-BIAS-(MANTBITS+FUDGE2)
+ }
+ {
+ A = convert_d2df(CTMP)
+ }
+ {
+ AH += asl(TMP,#HI_MANTBITS)
+ dealloc_return
+ }
+.Lpossible_unf:
+ {
+ TMP = ##0x7fefffff
+ ATMP = abs(CTMP)
+ }
+ {
+ p0 = cmp.eq(AL,#0)
+ p0 = bitsclr(AH,TMP)
+ if (!p0.new) dealloc_return:t
+ TMP = #0x7fff
+ }
+ {
+ p0 = bitsset(ATMPH,TMP)
+ BH = USR
+ BL = #0x0030
+ }
+ {
+ if (p0) BH = or(BH,BL)
+ }
+ {
+ USR = BH
+ }
+ {
+ p0 = dfcmp.eq(A,A)
+ dealloc_return
+ }
+.Lfma_ovf:
+ {
+ TMP = USR
+ CTMP = combine(##0x7fefffff,#-1)
+ A = CTMP
+ }
+ {
+ ATMP = combine(##0x7ff00000,#0)
+ BH = extractu(TMP,#2,#SR_ROUND_OFF)
+ TMP = or(TMP,#0x28)
+ }
+ {
+ USR = TMP
+ BH ^= lsr(AH,#31)
+ BL = BH
+ }
+ {
+ p0 = !cmp.eq(BL,#1)
+ p0 = !cmp.eq(BH,#2)
+ }
+ {
+ p0 = dfcmp.eq(ATMP,ATMP)
+ if (p0.new) CTMP = ATMP
+ }
+ {
+ A = insert(CTMP,#63,#0)
+ dealloc_return
+ }
+#undef CTMP
+#undef CTMPH
+#undef CTMPL
+#define BTMP r11:10
+#define BTMPH r11
+#define BTMPL r10
+
+#undef STICKIES
+#undef STICKIESH
+#undef STICKIESL
+#define C r5:4
+#define CH r5
+#define CL r4
+
+.Lfma_abnormal_ab:
+ {
+ ATMP = extractu(A,#63,#0)
+ BTMP = extractu(B,#63,#0)
+ deallocframe
+ }
+ {
+ p3 = cmp.gtu(ATMP,BTMP)
+ if (!p3.new) A = B // sort values
+ if (!p3.new) B = A
+ }
+ {
+ p0 = dfclass(A,#0x0f) // A NaN?
+ if (!p0.new) jump:nt .Lnan
+ if (!p3) ATMP = BTMP
+ if (!p3) BTMP = ATMP
+ }
+ {
+ p1 = dfclass(A,#0x08) // A is infinity
+ p1 = dfclass(B,#0x0e) // B is nonzero
+ }
+ {
+ p0 = dfclass(A,#0x08) // a is inf
+ p0 = dfclass(B,#0x01) // b is zero
+ }
+ {
+ if (p1) jump .Lab_inf
+ p2 = dfclass(B,#0x01)
+ }
+ {
+ if (p0) jump .Linvalid
+ if (p2) jump .Lab_true_zero
+ TMP = ##0x7c000000
+ }
+ // We are left with a normal or subnormal times a subnormal, A > B
+ // If A and B are both very small, we will go to a single sticky bit; replace
+ // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
+ // if A and B might multiply to something bigger, decrease A exp and increase B exp
+ // and start over
+ {
+ p0 = bitsclr(AH,TMP)
+ if (p0.new) jump:nt .Lfma_ab_tiny
+ }
+ {
+ TMP = add(clb(BTMP),#-EXPBITS)
+ }
+ {
+ BTMP = asl(BTMP,TMP)
+ }
+ {
+ B = insert(BTMP,#63,#0)
+ AH -= asl(TMP,#HI_MANTBITS)
+ }
+ jump fma
+
+.Lfma_ab_tiny:
+ ATMP = combine(##0x00100000,#0)
+ {
+ A = insert(ATMP,#63,#0)
+ B = insert(ATMP,#63,#0)
+ }
+ jump fma
+
+.Lab_inf:
+ {
+ B = lsr(B,#63)
+ p0 = dfclass(C,#0x10)
+ }
+ {
+ A ^= asl(B,#63)
+ if (p0) jump .Lnan
+ }
+ {
+ p1 = dfclass(C,#0x08)
+ if (p1.new) jump:nt .Lfma_inf_plus_inf
+ }
+ /* A*B is +/- inf, C is finite. Return A */
+ {
+ jumpr r31
+ }
+ .falign
+.Lfma_inf_plus_inf:
+ { // adding infinities of different signs is invalid
+ p0 = dfcmp.eq(A,C)
+ if (!p0.new) jump:nt .Linvalid
+ }
+ {
+ jumpr r31
+ }
+
+.Lnan:
+ {
+ p0 = dfclass(B,#0x10)
+ p1 = dfclass(C,#0x10)
+ if (!p0.new) B = A
+ if (!p1.new) C = A
+ }
+ { // find sNaNs
+ BH = convert_df2sf(B)
+ BL = convert_df2sf(C)
+ }
+ {
+ BH = convert_df2sf(A)
+ A = #-1
+ jumpr r31
+ }
+
+.Linvalid:
+ {
+ TMP = ##0x7f800001 // sp snan
+ }
+ {
+ A = convert_sf2df(TMP)
+ jumpr r31
+ }
+
+.Lab_true_zero:
+ // B is zero, A is finite number
+ {
+ p0 = dfclass(C,#0x10)
+ if (p0.new) jump:nt .Lnan
+ if (p0.new) A = C
+ }
+ {
+ p0 = dfcmp.eq(B,C) // is C also zero?
+ AH = lsr(AH,#31) // get sign
+ }
+ {
+ BH ^= asl(AH,#31) // form correctly signed zero in B
+ if (!p0) A = C // If C is not zero, return C
+ if (!p0) jumpr r31
+ }
+ /* B has correctly signed zero, C is also zero */
+.Lzero_plus_zero:
+ {
+ p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0
+ if (p0.new) jumpr:t r31
+ A = B
+ }
+ {
+ TMP = USR
+ }
+ {
+ TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+ A = #0
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = ##0x80000000
+ jumpr r31
+ }
+#undef BTMP
+#undef BTMPH
+#undef BTMPL
+#define CTMP r11:10
+ .falign
+.Lfma_abnormal_c:
+ /* We know that AB is normal * normal */
+ /* C is not normal: zero, subnormal, inf, or NaN. */
+ {
+ p0 = dfclass(C,#0x10) // is C NaN?
+ if (p0.new) jump:nt .Lnan
+ if (p0.new) A = C // move NaN to A
+ deallocframe
+ }
+ {
+ p0 = dfclass(C,#0x08) // is C inf?
+ if (p0.new) A = C // return C
+ if (p0.new) jumpr:nt r31
+ }
+ // zero or subnormal
+ // If we have a zero, and we know AB is normal*normal, we can just call normal multiply
+ {
+ p0 = dfclass(C,#0x01) // is C zero?
+ if (p0.new) jump:nt __hexagon_muldf3
+ TMP = #1
+ }
+ // Left with: subnormal
+ // Adjust C and jump back to restart
+ {
+ allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame
+ CTMP = #0
+ CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
+ jump .Lfma_abnormal_c_restart
+ }
+END(fma)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S b/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S
new file mode 100644
index 000000000000..41122911f183
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S
@@ -0,0 +1,79 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define A r1:0
+#define B r3:2
+#define ATMP r5:4
+
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+/*
+ * Min and Max return A if B is NaN, or B if A is NaN
+ * Otherwise, they return the smaller or bigger value
+ *
+ * If values are equal, we want to favor -0.0 for min and +0.0 for max.
+ */
+
+/*
+ * Compares always return false for NaN
+ * if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options.
+ */
+ .text
+ .global __hexagon_mindf3
+ .global __hexagon_maxdf3
+ .global fmin
+ .type fmin,@function
+ .global fmax
+ .type fmax,@function
+ .type __hexagon_mindf3,@function
+ .type __hexagon_maxdf3,@function
+ Q6_ALIAS(mindf3)
+ Q6_ALIAS(maxdf3)
+ .p2align 5
+__hexagon_mindf3:
+fmin:
+ {
+ p0 = dfclass(A,#0x10) // If A is a number
+ p1 = dfcmp.gt(A,B) // AND B > A, don't swap
+ ATMP = A
+ }
+ {
+ if (p0) A = B // if A is NaN use B
+ if (p1) A = B // gt is always false if either is NaN
+ p2 = dfcmp.eq(A,B) // if A == B
+ if (!p2.new) jumpr:t r31
+ }
+ /* A == B, return A|B to select -0.0 over 0.0 */
+ {
+ A = or(ATMP,B)
+ jumpr r31
+ }
+END(__hexagon_mindf3)
+ .falign
+__hexagon_maxdf3:
+fmax:
+ {
+ p0 = dfclass(A,#0x10)
+ p1 = dfcmp.gt(B,A)
+ ATMP = A
+ }
+ {
+ if (p0) A = B
+ if (p1) A = B
+ p2 = dfcmp.eq(A,B)
+ if (!p2.new) jumpr:t r31
+ }
+ /* A == B, return A&B to select 0.0 over -0.0 */
+ {
+ A = and(ATMP,B)
+ jumpr r31
+ }
+END(__hexagon_maxdf3)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S b/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S
new file mode 100644
index 000000000000..fde6d77bdcff
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S
@@ -0,0 +1,418 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Multiply */
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define BTMP r5:4
+#define BTMPH r5
+#define BTMPL r4
+
+#define PP_ODD r7:6
+#define PP_ODD_H r7
+#define PP_ODD_L r6
+
+#define ONE r9:8
+#define S_ONE r8
+#define S_ZERO r9
+
+#define PP_HH r11:10
+#define PP_HH_H r11
+#define PP_HH_L r10
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define PP_LL r15:14
+#define PP_LL_H r15
+#define PP_LL_L r14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+
+/* Some constant to adjust normalization amount in error code */
+/* Amount to right shift the partial product to get to a denorm */
+#define FUDGE 5
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+#define SR_ROUND_OFF 22
+ .text
+ .global __hexagon_muldf3
+ .type __hexagon_muldf3,@function
+ Q6_ALIAS(muldf3)
+ FAST_ALIAS(muldf3)
+ FAST2_ALIAS(muldf3)
+ .p2align 5
+__hexagon_muldf3:
+ {
+ p0 = dfclass(A,#2)
+ p0 = dfclass(B,#2)
+ ATMP = combine(##0x40000000,#0)
+ }
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-1)
+ BTMP = asl(B,#EXPBITS-1)
+ TMP = #-BIAS
+ ONE = #1
+ }
+ {
+ PP_ODD = mpyu(BTMPL,ATMPH)
+ BTMP = insert(ONE,#2,#62)
+ }
+ /* since we know that the MSB of the H registers is zero, we should never carry */
+ /* H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 */
+ /* Adding 2 HLs, we get 2^64-3*2^32+2 maximum. */
+ /* Therefore, we can add 3 2^32-1 values safely without carry. We only need one. */
+ {
+ PP_LL = mpyu(ATMPL,BTMPL)
+ PP_ODD += mpyu(ATMPL,BTMPH)
+ }
+ {
+ PP_ODD += lsr(PP_LL,#32)
+ PP_HH = mpyu(ATMPH,BTMPH)
+ BTMP = combine(##BIAS+BIAS-4,#0)
+ }
+ {
+ PP_HH += lsr(PP_ODD,#32)
+ if (!p0) jump .Lmul_abnormal
+ p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0?
+ p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0?
+ }
+ /*
+ * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+ * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+ */
+#undef PP_ODD
+#undef PP_ODD_H
+#undef PP_ODD_L
+#define EXP10 r7:6
+#define EXP1 r7
+#define EXP0 r6
+ {
+ if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
+ EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
+ }
+ {
+ PP_LL = neg(PP_HH)
+ EXP0 += add(TMP,EXP1)
+ TMP = xor(AH,BH)
+ }
+ {
+ if (!p2.new) PP_HH = PP_LL
+ p2 = cmp.gt(TMP,#-1)
+ p0 = !cmp.gt(EXP0,BTMPH)
+ p0 = cmp.gt(EXP0,BTMPL)
+ if (!p0.new) jump:nt .Lmul_ovf_unf
+ }
+ {
+ A = convert_d2df(PP_HH)
+ EXP0 = add(EXP0,#-BIAS-58)
+ }
+ {
+ AH += asl(EXP0,#HI_MANTBITS)
+ jumpr r31
+ }
+
+ .falign
+.Lpossible_unf:
+ /* We end up with a positive exponent */
+ /* But we may have rounded up to an exponent of 1. */
+ /* If the exponent is 1, if we rounded up to it
+ * we need to also raise underflow
+ * Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
+ * And the PP should also have more than one bit set
+ */
+ /* Note: ATMP should have abs(PP_HH) */
+ /* Note: BTMPL should have 0x7FEFFFFF */
+ {
+ p0 = cmp.eq(AL,#0)
+ p0 = bitsclr(AH,BTMPL)
+ if (!p0.new) jumpr:t r31
+ BTMPH = #0x7fff
+ }
+ {
+ p0 = bitsset(ATMPH,BTMPH)
+ BTMPL = USR
+ BTMPH = #0x030
+ }
+ {
+ if (p0) BTMPL = or(BTMPL,BTMPH)
+ }
+ {
+ USR = BTMPL
+ }
+ {
+ p0 = dfcmp.eq(A,A)
+ jumpr r31
+ }
+ .falign
+.Lmul_ovf_unf:
+ {
+ A = convert_d2df(PP_HH)
+ ATMP = abs(PP_HH) // take absolute value
+ EXP1 = add(EXP0,#-BIAS-58)
+ }
+ {
+ AH += asl(EXP1,#HI_MANTBITS)
+ EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ BTMPL = ##0x7FEFFFFF
+ }
+ {
+ EXP1 += add(EXP0,##-BIAS-58)
+ //BTMPH = add(clb(ATMP),#-2)
+ BTMPH = #0
+ }
+ {
+ p0 = cmp.gt(EXP1,##BIAS+BIAS-2) // overflow
+ if (p0.new) jump:nt .Lmul_ovf
+ }
+ {
+ p0 = cmp.gt(EXP1,#0)
+ if (p0.new) jump:nt .Lpossible_unf
+ BTMPH = sub(EXP0,BTMPH)
+ TMP = #63 // max amount to shift
+ }
+ /* Underflow */
+ /*
+ * PP_HH has the partial product with sticky LSB.
+ * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+ * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+ * The exponent of PP_HH is in EXP1, which is non-positive (0 or negative)
+ * That's the exponent that happens after the normalization
+ *
+ * EXP0 has the exponent that, when added to the normalized value, is out of range.
+ *
+ * Strategy:
+ *
+ * * Shift down bits, with sticky bit, such that the bits are aligned according
+ * to the LZ count and appropriate exponent, but not all the way to mantissa
+ * field, keep around the last few bits.
+ * * Put a 1 near the MSB
+ * * Check the LSBs for inexact; if inexact also set underflow
+ * * Convert [u]d2df -- will correctly round according to rounding mode
+ * * Replace exponent field with zero
+ *
+ *
+ */
+
+
+ {
+ BTMPL = #0 // offset for extract
+ BTMPH = sub(#FUDGE,BTMPH) // amount to right shift
+ }
+ {
+ p3 = cmp.gt(PP_HH_H,#-1) // is it positive?
+ BTMPH = min(BTMPH,TMP) // Don't shift more than 63
+ PP_HH = ATMP
+ }
+ {
+ TMP = USR
+ PP_LL = extractu(PP_HH,BTMP)
+ }
+ {
+ PP_HH = asr(PP_HH,BTMPH)
+ BTMPL = #0x0030 // underflow flag
+ AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
+ }
+ {
+ p0 = cmp.gtu(ONE,PP_LL) // Did we extract all zeros?
+ if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE) // add sticky bit
+ PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3) // Add back in a bit so we can use convert instruction
+ }
+ {
+ PP_LL = neg(PP_HH)
+ p1 = bitsclr(PP_HH_L,#0x7) // Are the LSB's clear?
+ if (!p1.new) TMP = or(BTMPL,TMP) // If not, Inexact+Underflow
+ }
+ {
+ if (!p3) PP_HH = PP_LL
+ USR = TMP
+ }
+ {
+ A = convert_d2df(PP_HH) // Do rounding
+ p0 = dfcmp.eq(A,A) // realize exception
+ }
+ {
+ AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1) // Insert correct exponent
+ jumpr r31
+ }
+ .falign
+.Lmul_ovf:
+ // We get either max finite value or infinity. Either way, overflow+inexact
+ {
+ TMP = USR
+ ATMP = combine(##0x7fefffff,#-1) // positive max finite
+ A = PP_HH
+ }
+ {
+ PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
+ TMP = or(TMP,#0x28) // inexact + overflow
+ BTMP = combine(##0x7ff00000,#0) // positive infinity
+ }
+ {
+ USR = TMP
+ PP_LL_L ^= lsr(AH,#31) // Does sign match rounding?
+ TMP = PP_LL_L // unmodified rounding mode
+ }
+ {
+ p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
+ p0 = !cmp.eq(PP_LL_L,#2) // Not rounding the other way,
+ if (p0.new) ATMP = BTMP // we should get infinity
+ p0 = dfcmp.eq(A,A) // Realize FP exception if enabled
+ }
+ {
+ A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
+ jumpr r31
+ }
+
+.Lmul_abnormal:
+ {
+ ATMP = extractu(A,#63,#0) // strip off sign
+ BTMP = extractu(B,#63,#0) // strip off sign
+ }
+ {
+ p3 = cmp.gtu(ATMP,BTMP)
+ if (!p3.new) A = B // sort values
+ if (!p3.new) B = A // sort values
+ }
+ {
+ // Any NaN --> NaN, possibly raise invalid if sNaN
+ p0 = dfclass(A,#0x0f) // A not NaN?
+ if (!p0.new) jump:nt .Linvalid_nan
+ if (!p3) ATMP = BTMP
+ if (!p3) BTMP = ATMP
+ }
+ {
+ // Infinity * nonzero number is infinity
+ p1 = dfclass(A,#0x08) // A is infinity
+ p1 = dfclass(B,#0x0e) // B is nonzero
+ }
+ {
+ // Infinity * zero --> NaN, raise invalid
+ // Other zeros return zero
+ p0 = dfclass(A,#0x08) // A is infinity
+ p0 = dfclass(B,#0x01) // B is zero
+ }
+ {
+ if (p1) jump .Ltrue_inf
+ p2 = dfclass(B,#0x01)
+ }
+ {
+ if (p0) jump .Linvalid_zeroinf
+ if (p2) jump .Ltrue_zero // so return zero
+ TMP = ##0x7c000000
+ }
+ // We are left with a normal or subnormal times a subnormal. A > B
+ // If A and B are both very small (exp(a) < BIAS-MANTBITS),
+ // we go to a single sticky bit, which we can round easily.
+ // If A and B might multiply to something bigger, decrease A exponent and increase
+ // B exponent and try again
+ {
+ p0 = bitsclr(AH,TMP)
+ if (p0.new) jump:nt .Lmul_tiny
+ }
+ {
+ TMP = cl0(BTMP)
+ }
+ {
+ TMP = add(TMP,#-EXPBITS)
+ }
+ {
+ BTMP = asl(BTMP,TMP)
+ }
+ {
+ B = insert(BTMP,#63,#0)
+ AH -= asl(TMP,#HI_MANTBITS)
+ }
+ jump __hexagon_muldf3
+.Lmul_tiny:
+ {
+ TMP = USR
+ A = xor(A,B) // get sign bit
+ }
+ {
+ TMP = or(TMP,#0x30) // Inexact + Underflow
+ A = insert(ONE,#63,#0) // put in rounded up value
+ BTMPH = extractu(TMP,#2,#SR_ROUND_OFF) // get rounding mode
+ }
+ {
+ USR = TMP
+ p0 = cmp.gt(BTMPH,#1) // Round towards pos/neg inf?
+ if (!p0.new) AL = #0 // If not, zero
+ BTMPH ^= lsr(AH,#31) // rounding my way --> set LSB
+ }
+ {
+ p0 = cmp.eq(BTMPH,#3) // if rounding towards right inf
+ if (!p0.new) AL = #0 // don't go to zero
+ jumpr r31
+ }
+.Linvalid_zeroinf:
+ {
+ TMP = USR
+ }
+ {
+ A = #-1
+ TMP = or(TMP,#2)
+ }
+ {
+ USR = TMP
+ }
+ {
+ p0 = dfcmp.uo(A,A) // force exception if enabled
+ jumpr r31
+ }
+.Linvalid_nan:
+ {
+ p0 = dfclass(B,#0x0f) // if B is not NaN
+ TMP = convert_df2sf(A) // will generate invalid if sNaN
+ if (p0.new) B = A // make it whatever A is
+ }
+ {
+ BL = convert_df2sf(B) // will generate invalid if sNaN
+ A = #-1
+ jumpr r31
+ }
+ .falign
+.Ltrue_zero:
+ {
+ A = B
+ B = A
+ }
+.Ltrue_inf:
+ {
+ BH = extract(BH,#1,#31)
+ }
+ {
+ AH ^= asl(BH,#31)
+ jumpr r31
+ }
+END(__hexagon_muldf3)
+
+#undef ATMP
+#undef ATMPL
+#undef ATMPH
+#undef BTMP
+#undef BTMPL
+#undef BTMPH
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S b/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S
new file mode 100644
index 000000000000..027d9e1fde43
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S
@@ -0,0 +1,406 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision square root */
+
+#define EXP r28
+
+#define A r1:0
+#define AH r1
+#define AL r0
+
+#define SFSH r3:2
+#define SF_S r3
+#define SF_H r2
+
+#define SFHALF_SONE r5:4
+#define S_ONE r4
+#define SFHALF r5
+#define SF_D r6
+#define SF_E r7
+#define RECIPEST r8
+#define SFRAD r9
+
+#define FRACRAD r11:10
+#define FRACRADH r11
+#define FRACRADL r10
+
+#define ROOT r13:12
+#define ROOTHI r13
+#define ROOTLO r12
+
+#define PROD r15:14
+#define PRODHI r15
+#define PRODLO r14
+
+#define P_TMP p0
+#define P_EXP1 p1
+#define NORMAL p2
+
+#define SF_EXPBITS 8
+#define SF_MANTBITS 23
+
+#define DF_EXPBITS 11
+#define DF_MANTBITS 52
+
+#define DF_BIAS 0x3ff
+
+#define DFCLASS_ZERO 0x01
+#define DFCLASS_NORMAL 0x02
+#define DFCLASS_DENORMAL 0x02
+#define DFCLASS_INFINITE 0x08
+#define DFCLASS_NAN 0x10
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
+#define END(TAG) .size TAG,.-TAG
+
+ .text
+ .global __hexagon_sqrtdf2
+ .type __hexagon_sqrtdf2,@function
+ .global __hexagon_sqrt
+ .type __hexagon_sqrt,@function
+ Q6_ALIAS(sqrtdf2)
+ Q6_ALIAS(sqrt)
+ FAST_ALIAS(sqrtdf2)
+ FAST_ALIAS(sqrt)
+ FAST2_ALIAS(sqrtdf2)
+ FAST2_ALIAS(sqrt)
+ .type sqrt,@function
+ .p2align 5
+__hexagon_sqrtdf2:
+__hexagon_sqrt:
+ {
+ PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
+ EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
+ SFHALF_SONE = combine(##0x3f000004,#1)
+ }
+ {
+ NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal
+ NORMAL = cmp.gt(AH,#-1) // and positive?
+ if (!NORMAL.new) jump:nt .Lsqrt_abnormal
+ SFRAD = or(SFHALF,PRODLO)
+ }
+#undef NORMAL
+.Ldenormal_restart:
+ {
+ FRACRAD = A
+ SF_E,P_TMP = sfinvsqrta(SFRAD)
+ SFHALF = and(SFHALF,#-16)
+ SFSH = #0
+ }
+#undef A
+#undef AH
+#undef AL
+#define ERROR r1:0
+#define ERRORHI r1
+#define ERRORLO r0
+ // SF_E : reciprocal square root
+ // SF_H : half rsqrt
+ // sf_S : square root
+ // SF_D : error term
+ // SFHALF: 0.5
+ {
+ SF_S += sfmpy(SF_E,SFRAD):lib // s0: root
+ SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent...
+ SF_D = SFHALF
+#undef SFRAD
+#define SHIFTAMT r9
+ SHIFTAMT = and(EXP,#1)
+ }
+ {
+ SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1
+ FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden
+ P_EXP1 = cmp.gtu(SHIFTAMT,#0)
+ }
+ {
+ SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt
+ SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip
+ SF_D = SFHALF
+ SHIFTAMT = mux(P_EXP1,#8,#9)
+ }
+ {
+ SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term
+ FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place
+ SHIFTAMT = mux(P_EXP1,#3,#2)
+ }
+ {
+ SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt
+ // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
+ PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1)
+ }
+ {
+ SF_H = and(SF_H,##0x007fffff)
+ }
+ {
+ SF_H = add(SF_H,##0x00800000 - 3)
+ SHIFTAMT = mux(P_EXP1,#7,#8)
+ }
+ {
+ RECIPEST = asl(SF_H,SHIFTAMT)
+ SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
+ }
+ {
+ ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
+ }
+
+#undef SFSH // r3:2
+#undef SF_H // r2
+#undef SF_S // r3
+#undef S_ONE // r4
+#undef SFHALF // r5
+#undef SFHALF_SONE // r5:4
+#undef SF_D // r6
+#undef SF_E // r7
+
+#define HL r3:2
+#define LL r5:4
+#define HH r7:6
+
+#undef P_EXP1
+#define P_CARRY0 p1
+#define P_CARRY1 p2
+#define P_CARRY2 p3
+
+ /* Iteration 0 */
+ /* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */
+ /* We can shift and subtract instead of shift and add? */
+ {
+ ERROR = asl(FRACRAD,#15)
+ PROD = mpyu(ROOTHI,ROOTHI)
+ P_CARRY0 = cmp.eq(r0,r0)
+ }
+ {
+ ERROR -= asl(PROD,#15)
+ PROD = mpyu(ROOTHI,ROOTLO)
+ P_CARRY1 = cmp.eq(r0,r0)
+ }
+ {
+ ERROR -= lsr(PROD,#16)
+ P_CARRY2 = cmp.eq(r0,r0)
+ }
+ {
+ ERROR = mpyu(ERRORHI,RECIPEST)
+ }
+ {
+ ROOT += lsr(ERROR,SHIFTAMT)
+ SHIFTAMT = add(SHIFTAMT,#16)
+ ERROR = asl(FRACRAD,#31) // for next iter
+ }
+ /* Iteration 1 */
+ {
+ PROD = mpyu(ROOTHI,ROOTHI)
+ ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed
+ }
+ {
+ ERROR -= asl(PROD,#31)
+ PROD = mpyu(ROOTLO,ROOTLO)
+ }
+ {
+ ERROR -= lsr(PROD,#33)
+ }
+ {
+ ERROR = mpyu(ERRORHI,RECIPEST)
+ }
+ {
+ ROOT += lsr(ERROR,SHIFTAMT)
+ SHIFTAMT = add(SHIFTAMT,#16)
+ ERROR = asl(FRACRAD,#47) // for next iter
+ }
+ /* Iteration 2 */
+ {
+ PROD = mpyu(ROOTHI,ROOTHI)
+ }
+ {
+ ERROR -= asl(PROD,#47)
+ PROD = mpyu(ROOTHI,ROOTLO)
+ }
+ {
+ ERROR -= asl(PROD,#16) // bidir shr 31-47
+ PROD = mpyu(ROOTLO,ROOTLO)
+ }
+ {
+ ERROR -= lsr(PROD,#17) // 64-47
+ }
+ {
+ ERROR = mpyu(ERRORHI,RECIPEST)
+ }
+ {
+ ROOT += lsr(ERROR,SHIFTAMT)
+ }
+#undef ERROR
+#undef PROD
+#undef PRODHI
+#undef PRODLO
+#define REM_HI r15:14
+#define REM_HI_HI r15
+#define REM_LO r1:0
+#undef RECIPEST
+#undef SHIFTAMT
+#define TWOROOT_LO r9:8
+ /* Adjust Root */
+ {
+ HL = mpyu(ROOTHI,ROOTLO)
+ LL = mpyu(ROOTLO,ROOTLO)
+ REM_HI = #0
+ REM_LO = #0
+ }
+ {
+ HL += lsr(LL,#33)
+ LL += asl(HL,#33)
+ P_CARRY0 = cmp.eq(r0,r0)
+ }
+ {
+ HH = mpyu(ROOTHI,ROOTHI)
+ REM_LO = sub(REM_LO,LL,P_CARRY0):carry
+ TWOROOT_LO = #1
+ }
+ {
+ HH += lsr(HL,#31)
+ TWOROOT_LO += asl(ROOT,#1)
+ }
+#undef HL
+#undef LL
+#define REM_HI_TMP r3:2
+#define REM_HI_TMP_HI r3
+#define REM_LO_TMP r5:4
+ {
+ REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
+ REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
+#undef FRACRAD
+#undef HH
+#define ZERO r11:10
+#define ONE r7:6
+ ONE = #1
+ ZERO = #0
+ }
+ {
+ REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
+ ONE = add(ROOT,ONE)
+ EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp
+ }
+ {
+ // If carry set, no borrow: result was still positive
+ if (P_CARRY1) ROOT = ONE
+ if (P_CARRY1) REM_LO = REM_LO_TMP
+ if (P_CARRY1) REM_HI = REM_HI_TMP
+ }
+ {
+ REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
+ ONE = #1
+ EXP = asr(EXP,#1) // divide signed exp by 2
+ }
+ {
+ REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
+ ONE = add(ROOT,ONE)
+ }
+ {
+ if (P_CARRY2) ROOT = ONE
+ if (P_CARRY2) REM_LO = REM_LO_TMP
+ // since tworoot <= 2^32, remhi must be zero
+#undef REM_HI_TMP
+#undef REM_HI_TMP_HI
+#define S_ONE r2
+#define ADJ r3
+ S_ONE = #1
+ }
+ {
+ P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero
+ if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully
+ ADJ = cl0(ROOT)
+ EXP = add(EXP,#-63)
+ }
+#undef REM_LO
+#define RET r1:0
+#define RETHI r1
+ {
+ RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag
+ EXP = add(EXP,ADJ) // add back bias
+ }
+ {
+ RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust
+ jumpr r31
+ }
+#undef REM_LO_TMP
+#undef REM_HI_TMP
+#undef REM_HI_TMP_HI
+#undef REM_LO
+#undef REM_HI
+#undef TWOROOT_LO
+
+#undef RET
+#define A r1:0
+#define AH r1
+#define AL r1
+#undef S_ONE
+#define TMP r3:2
+#define TMPHI r3
+#define TMPLO r2
+#undef P_CARRY0
+#define P_NEG p1
+
+
+#define SFHALF r5
+#define SFRAD r9
+.Lsqrt_abnormal:
+ {
+ P_TMP = dfclass(A,#DFCLASS_ZERO) // zero?
+ if (P_TMP.new) jumpr:t r31
+ }
+ {
+ P_TMP = dfclass(A,#DFCLASS_NAN)
+ if (P_TMP.new) jump:nt .Lsqrt_nan
+ }
+ {
+ P_TMP = cmp.gt(AH,#-1)
+ if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
+ if (!P_TMP.new) EXP = ##0x7F800001 // sNaN
+ }
+ {
+ P_TMP = dfclass(A,#DFCLASS_INFINITE)
+ if (P_TMP.new) jumpr:nt r31
+ }
+ // If we got here, we're denormal
+ // prepare to restart
+ {
+ A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa
+ }
+ {
+ EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize?
+ }
+ {
+ A = asl(A,EXP) // Shift mantissa
+ EXP = sub(#1,EXP) // Form exponent
+ }
+ {
+ AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent
+ }
+ {
+ TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1)
+ SFHALF = ##0x3f000004 // form half constant
+ }
+ {
+ SFRAD = or(SFHALF,TMPLO) // form sf value
+ SFHALF = and(SFHALF,#-16)
+ jump .Ldenormal_restart // restart
+ }
+.Lsqrt_nan:
+ {
+ EXP = convert_df2sf(A) // if sNaN, get invalid
+ A = #-1 // qNaN
+ jumpr r31
+ }
+.Lsqrt_invalid_neg:
+ {
+ A = convert_sf2df(EXP) // Invalid,NaNval
+ jumpr r31
+ }
+END(__hexagon_sqrt)
+END(__hexagon_sqrtdf2)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S b/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S
new file mode 100644
index 000000000000..49ee8104f305
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S
@@ -0,0 +1,85 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_divdi3
+ {
+ p2 = tstbit(r1,#31)
+ p3 = tstbit(r3,#31)
+ }
+ {
+ r1:0 = abs(r1:0)
+ r3:2 = abs(r3:2)
+ }
+ {
+ r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
+ r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
+ r5:4 = r3:2 // divisor moved into working registers
+ r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
+ }
+ {
+ p3 = xor(p2,p3)
+ r10 = sub(r7,r6) // left shift count for bit & divisor
+ r1:0 = #0 // initialize quotient to 0
+ r15:14 = #1 // initialize bit to 1
+ }
+ {
+ r11 = add(r10,#1) // loop count is 1 more than shift count
+ r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
+ r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
+ }
+ {
+ p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
+ loop0(1f,r11) // register loop
+ }
+ {
+ if (p0) jump .hexagon_divdi3_return // if divisor > dividend, we're done, so return
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
+ }
+ {
+ r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
+ r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
+ }
+ {
+ r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+ r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+ }
+ {
+ r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
+ r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
+ }:endloop0
+
+.hexagon_divdi3_return:
+ {
+ r3:2 = neg(r1:0)
+ }
+ {
+ r1:0 = vmux(p3,r3:2,r1:0)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_divdi3
+
+ .globl __qdsp_divdi3
+ .set __qdsp_divdi3, __hexagon_divdi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S
new file mode 100644
index 000000000000..8e159baa192f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S
@@ -0,0 +1,84 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_divsi3
+ {
+ p0 = cmp.ge(r0,#0)
+ p1 = cmp.ge(r1,#0)
+ r1 = abs(r0)
+ r2 = abs(r1)
+ }
+ {
+ r3 = cl0(r1)
+ r4 = cl0(r2)
+ r5 = sub(r1,r2)
+ p2 = cmp.gtu(r2,r1)
+ }
+#if (__HEXAGON_ARCH__ == 60)
+ {
+ r0 = #0
+ p1 = xor(p0,p1)
+ p0 = cmp.gtu(r2,r5)
+ }
+ if (p2) jumpr r31
+#else
+ {
+ r0 = #0
+ p1 = xor(p0,p1)
+ p0 = cmp.gtu(r2,r5)
+ if (p2) jumpr r31
+ }
+#endif
+ {
+ r0 = mux(p1,#-1,#1)
+ if (p0) jumpr r31
+ r4 = sub(r4,r3)
+ r3 = #1
+ }
+ {
+ r0 = #0
+ r3:2 = vlslw(r3:2,r4)
+ loop0(1f,r4)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r1 = sub(r1,r2)
+ if (!p0.new) r0 = add(r0,r3)
+ r3:2 = vlsrw(r3:2,#1)
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r0 = add(r0,r3)
+ if (!p1) jumpr r31
+ }
+ {
+ r0 = neg(r0)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_divsi3
+
+ .globl __qdsp_divsi3
+ .set __qdsp_divsi3, __hexagon_divsi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S
new file mode 100644
index 000000000000..b09b00734d98
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S
@@ -0,0 +1,37 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size \name, . - \name
+.endm
+
+FUNCTION_BEGIN fabs
+ {
+ r1 = clrbit(r1, #31)
+ jumpr r31
+ }
+FUNCTION_END fabs
+
+FUNCTION_BEGIN fabsf
+ {
+ r0 = clrbit(r0, #31)
+ jumpr r31
+ }
+FUNCTION_END fabsf
+
+ .globl fabsl
+ .set fabsl, fabs
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S
new file mode 100644
index 000000000000..9286df06c26d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S
@@ -0,0 +1,491 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== */
+/* FUNCTIONS Optimized double floating point operators */
+/* ==================================================================== */
+/* c = dadd_asm(a, b) */
+/* ==================================================================== *
+fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+ fast2_QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, expdiff, j, k, hi, lo, cn;
+ lint mant;
+
+ expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+ expdiff = Q6_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) + (mantb>>expb);
+
+ hi = (int) (mant>>32);
+ lo = (int) (mant);
+
+ k = Q6_R_normamt_R(hi);
+ if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
+
+ mant = (mant << k);
+ cn = (mant == 0x8000000000000000LL);
+ exp = exp - k + cn;
+
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_dadd_asm
+ .type fast2_dadd_asm, @function
+fast2_dadd_asm:
+#define manta R0
+#define mantexpa R1:0
+#define lmanta R1:0
+#define mantb R2
+#define mantexpb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define mantexpd R7:6
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define manth R1
+#define mantl R0
+#define minmin R11:10 // exactly 0x000000000000008001LL
+#define minminl R10
+#define k R4
+#define ce P0
+ .falign
+ {
+ mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+ c63 = #62
+ expa = SXTH(manta)
+ expb = SXTH(mantb)
+ } {
+ expd = SXTH(expd)
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ manta.L = #0
+ expd = MIN(expd, c63)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ mantb.L = #0
+ minmin = #0
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ } {
+ lmant = add(lmanta, lmantb)
+ minminl.L = #0x8001
+ } {
+ k = clb(lmant)
+ c63 = #58
+ } {
+ k = add(k, #-1)
+ p0 = cmp.gt(k, c63)
+ } {
+ mantexpa = ASL(lmant, k)
+ exp = SUB(exp, k)
+ if(p0) jump .Ldenorma
+ } {
+ manta = insert(exp, #16, #0)
+ jumpr r31
+ }
+.Ldenorma:
+ {
+ mantexpa = minmin
+ jumpr r31
+ }
+/* =================================================================== *
+ fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+ fast2_QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, expdiff, j, k;
+ lint mant;
+
+ expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+ expdiff = Q6_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) - (mantb>>expb);
+ k = Q6_R_clb_P(mant)-1;
+ mant = (mant << k);
+ exp = exp - k;
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_dsub_asm
+ .type fast2_dsub_asm, @function
+fast2_dsub_asm:
+
+#define manta R0
+#define mantexpa R1:0
+#define lmanta R1:0
+#define mantb R2
+#define mantexpb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define mantexpd R7:6
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define manth R1
+#define mantl R0
+#define minmin R11:10 // exactly 0x000000000000008001LL
+#define minminl R10
+#define k R4
+#define ce P0
+ .falign
+ {
+ mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+ c63 = #62
+ expa = SXTH(manta)
+ expb = SXTH(mantb)
+ } {
+ expd = SXTH(expd)
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ manta.L = #0
+ expd = MIN(expd, c63)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ mantb.L = #0
+ minmin = #0
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ } {
+ lmant = sub(lmanta, lmantb)
+ minminl.L = #0x8001
+ } {
+ k = clb(lmant)
+ c63 = #58
+ } {
+ k = add(k, #-1)
+ p0 = cmp.gt(k, c63)
+ } {
+ mantexpa = ASL(lmant, k)
+ exp = SUB(exp, k)
+ if(p0) jump .Ldenorm
+ } {
+ manta = insert(exp, #16, #0)
+ jumpr r31
+ }
+.Ldenorm:
+ {
+ mantexpa = minmin
+ jumpr r31
+ }
+/* ==================================================================== *
+ fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+ fast2_QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, k;
+ lint mant;
+ int hia, hib, hi, lo;
+ unsigned int loa, lob;
+
+ hia = (int)(a >> 32);
+ loa = Q6_R_extractu_RII((int)manta, 31, 1);
+ hib = (int)(b >> 32);
+ lob = Q6_R_extractu_RII((int)mantb, 31, 1);
+
+ mant = Q6_P_mpy_RR(hia, lob);
+ mant = Q6_P_mpyacc_RR(mant,hib, loa);
+ mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
+
+ hi = (int) (mant>>32);
+
+ k = Q6_R_normamt_R(hi);
+ mant = mant << k;
+ exp = expa + expb - k;
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_dmpy_asm
+ .type fast2_dmpy_asm, @function
+fast2_dmpy_asm:
+
+#define mantal R0
+#define mantah R1
+#define mantexpa R1:0
+#define mantbl R2
+#define mantbh R3
+#define mantexpb R3:2
+#define expa R4
+#define expb R5
+#define c8001 R12
+#define mantexpd R7:6
+#define mantdh R7
+#define exp R8
+#define lmantc R11:10
+#define kb R9
+#define guard R11
+#define mantal_ R12
+#define mantbl_ R13
+#define min R15:14
+#define minh R15
+
+ .falign
+ {
+ mantbl_= lsr(mantbl, #16)
+ expb = sxth(mantbl)
+ expa = sxth(mantal)
+ mantal_= lsr(mantal, #16)
+ }
+ {
+ lmantc = mpy(mantah, mantbh)
+ mantexpd = mpy(mantah, mantbl_)
+ mantal.L = #0x0
+ min = #0
+ }
+ {
+ lmantc = add(lmantc, lmantc)
+ mantexpd+= mpy(mantbh, mantal_)
+ mantbl.L = #0x0
+ minh.H = #0x8000
+ }
+ {
+ mantexpd = asr(mantexpd, #15)
+ c8001.L = #0x8001
+ p1 = cmp.eq(mantexpa, mantexpb)
+ }
+ {
+ mantexpd = add(mantexpd, lmantc)
+ exp = add(expa, expb)
+ p2 = cmp.eq(mantexpa, min)
+ }
+ {
+ kb = clb(mantexpd)
+ mantexpb = abs(mantexpd)
+ guard = #58
+ }
+ {
+ p1 = and(p1, p2)
+ exp = sub(exp, kb)
+ kb = add(kb, #-1)
+ p0 = cmp.gt(kb, guard)
+ }
+ {
+ exp = add(exp, #1)
+ mantexpa = asl(mantexpd, kb)
+ if(p1) jump .Lsat //rarely happens
+ }
+ {
+ mantal = insert(exp,#16, #0)
+ if(!p0) jumpr r31
+ }
+ {
+ mantal = insert(c8001,#16, #0)
+ jumpr r31
+ }
+.Lsat:
+ {
+ mantexpa = #-1
+ }
+ {
+ mantexpa = lsr(mantexpa, #1)
+ }
+ {
+ mantal = insert(exp,#16, #0)
+ jumpr r31
+ }
+
+/* ==================================================================== *
+ int fast2_qd2f(fast2_QDOUBLE a) {
+ int exp;
+ long long int manta;
+ int ic, rnd, mantb;
+
+ manta = a>>32;
+ exp = Q6_R_sxth_R(a) ;
+ ic = 0x80000000 & manta;
+ manta = Q6_R_abs_R_sat(manta);
+ mantb = (manta + rnd)>>7;
+ rnd = 0x40
+ exp = (exp + 126);
+ if((manta & 0xff) == rnd) rnd = 0x00;
+ if((manta & 0x7fffffc0) == 0x7fffffc0) {
+ manta = 0x0; exp++;
+ } else {
+ manta= mantb & 0x007fffff;
+ }
+ exp = (exp << 23) & 0x7fffffc0;
+ ic = Q6_R_addacc_RR(ic, exp, manta);
+ return (ic);
+ }
+ * ==================================================================== */
+
+ .text
+ .global fast2_qd2f_asm
+ .type fast2_qd2f_asm, @function
+fast2_qd2f_asm:
+#define mantah R1
+#define mantal R0
+#define cff R0
+#define mant R3
+#define expo R4
+#define rnd R5
+#define mask R6
+#define c07f R7
+#define c80 R0
+#define mantb R2
+#define ic R0
+
+ .falign
+ {
+ mant = abs(mantah):sat
+ expo = sxth(mantal)
+ rnd = #0x40
+ mask.L = #0xffc0
+ }
+ {
+ cff = extractu(mant, #8, #0)
+ p2 = cmp.gt(expo, #126)
+ p3 = cmp.ge(expo, #-126)
+ mask.H = #0x7fff
+ }
+ {
+ p1 = cmp.eq(cff,#0x40)
+ if(p1.new) rnd = #0
+ expo = add(expo, #126)
+ if(!p3) jump .Lmin
+ }
+ {
+ p0 = bitsset(mant, mask)
+ c80.L = #0x0000
+ mantb = add(mant, rnd)
+ c07f = lsr(mask, #8)
+ }
+ {
+ if(p0) expo = add(expo, #1)
+ if(p0) mant = #0
+ mantb = lsr(mantb, #7)
+ c80.H = #0x8000
+ }
+ {
+ ic = and(c80, mantah)
+ mask &= asl(expo, #23)
+ if(!p0) mant = and(mantb, c07f)
+ if(p2) jump .Lmax
+ }
+ {
+ ic += add(mask, mant)
+ jumpr r31
+ }
+.Lmax:
+ {
+ ic.L = #0xffff;
+ }
+ {
+ ic.H = #0x7f7f;
+ jumpr r31
+ }
+.Lmin:
+ {
+ ic = #0x0
+ jumpr r31
+ }
+
+/* ==================================================================== *
+fast2_QDOUBLE fast2_f2qd(int ia) {
+ lint exp;
+ lint mant;
+ fast2_QDOUBLE c;
+
+ mant = ((ia << 7) | 0x40000000)&0x7fffff80 ;
+ if (ia & 0x80000000) mant = -mant;
+ exp = ((ia >> 23) & 0xFFLL) - 126;
+ c = (mant<<32) | Q6_R_zxth_R(exp);;
+ return(c);
+}
+ * ==================================================================== */
+ .text
+ .global fast2_f2qd_asm
+ .type fast2_f2qd_asm, @function
+fast2_f2qd_asm:
+#define ia R0
+#define mag R3
+#define mantr R1
+#define expr R0
+#define zero R2
+#define maxneg R5:4
+#define maxnegl R4
+ .falign
+ {
+ mantr = asl(ia, #7)
+ p0 = tstbit(ia, #31)
+ maxneg = #0
+ mag = add(ia,ia)
+ }
+ {
+ mantr = setbit(mantr, #30)
+ expr= extractu(ia,#8,#23)
+ maxnegl.L = #0x8001
+ p1 = cmp.eq(mag, #0)
+ }
+ {
+ mantr= extractu(mantr, #31, #0)
+ expr= add(expr, #-126)
+ zero = #0
+ if(p1) jump .Lminqd
+ }
+ {
+ expr = zxth(expr)
+ if(p0) mantr= sub(zero, mantr)
+ jumpr r31
+ }
+.Lminqd:
+ {
+ R1:0 = maxneg
+ jumpr r31
+ }
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S
new file mode 100644
index 000000000000..4192555351a6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S
@@ -0,0 +1,345 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== *
+
+fast2_QLDOUBLE fast2_ldadd(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+ fast2_QLDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, expdiff, j, k, hi, lo, cn;
+ lint mant;
+
+ expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+ expdiff = Q6_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) + (mantb>>expb);
+
+ hi = (int) (mant>>32);
+ lo = (int) (mant);
+
+ k = Q6_R_normamt_R(hi);
+ if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
+
+ mant = (mant << k);
+ cn = (mant == 0x8000000000000000LL);
+ exp = exp - k + cn;
+
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_ldadd_asm
+ .type fast2_ldadd_asm, @function
+fast2_ldadd_asm:
+#define manta R1:0
+#define lmanta R1:0
+#define mantb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define k R4
+#define ce P0
+#define zero R3:2
+ .falign
+ {
+ expa = memw(r29+#8)
+ expb = memw(r29+#24)
+ r7 = r0
+ }
+ {
+ expd = sub(expa, expb):sat
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ expd = abs(expd):sat
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ c63 = #62
+ } {
+ expd = MIN(expd, c63)
+ manta = memd(r29+#0)
+ mantb = memd(r29+#16)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ } {
+ lmant = add(lmanta, lmantb)
+ zero = #0
+ } {
+ k = clb(lmant)
+ c63.L =#0x0001
+ } {
+ exp -= add(k, #-1) //exp = exp - (k-1)
+ k = add(k, #-1)
+ p0 = cmp.gt(k, #58)
+ c63.H =#0x8000
+ } {
+ if(!p0)memw(r7+#8) = exp
+ lmant = ASL(lmant, k)
+ if(p0) jump .Ldenorma
+ } {
+ memd(r7+#0) = lmant
+ jumpr r31
+ }
+.Ldenorma:
+ memd(r7+#0) = zero
+ {
+ memw(r7+#8) = c63
+ jumpr r31
+ }
+/* =================================================================== *
+ fast2_QLDOUBLE fast2_ldsub(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+ fast2_QLDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, expdiff, j, k;
+ lint mant;
+
+ expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+ expdiff = Q6_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) - (mantb>>expb);
+ k = Q6_R_clb_P(mant)-1;
+ mant = (mant << k);
+ exp = exp - k;
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_ldsub_asm
+ .type fast2_ldsub_asm, @function
+fast2_ldsub_asm:
+#define manta R1:0
+#define lmanta R1:0
+#define mantb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define k R4
+#define ce P0
+#define zero R3:2
+ .falign
+ {
+ expa = memw(r29+#8)
+ expb = memw(r29+#24)
+ r7 = r0
+ }
+ {
+ expd = sub(expa, expb):sat
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ expd = abs(expd):sat
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ c63 = #62
+ } {
+ expd = min(expd, c63)
+ manta = memd(r29+#0)
+ mantb = memd(r29+#16)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ } {
+ lmant = sub(lmanta, lmantb)
+ zero = #0
+ } {
+ k = clb(lmant)
+ c63.L =#0x0001
+ } {
+ exp -= add(k, #-1) //exp = exp - (k+1)
+ k = add(k, #-1)
+ p0 = cmp.gt(k, #58)
+ c63.H =#0x8000
+ } {
+ if(!p0)memw(r7+#8) = exp
+ lmant = asl(lmant, k)
+ if(p0) jump .Ldenorma_s
+ } {
+ memd(r7+#0) = lmant
+ jumpr r31
+ }
+.Ldenorma_s:
+ memd(r7+#0) = zero
+ {
+ memw(r7+#8) = c63
+ jumpr r31
+ }
+
+/* ==================================================================== *
+ fast2_QLDOUBLE fast2_ldmpy(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+ fast2_QLDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = Q6_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = Q6_R_sxth_R(b) ;
+ int exp, k;
+ lint mant;
+ int hia, hib, hi, lo;
+ unsigned int loa, lob;
+
+ hia = (int)(a >> 32);
+ loa = Q6_R_extractu_RII((int)manta, 31, 1);
+ hib = (int)(b >> 32);
+ lob = Q6_R_extractu_RII((int)mantb, 31, 1);
+
+ mant = Q6_P_mpy_RR(hia, lob);
+ mant = Q6_P_mpyacc_RR(mant,hib, loa);
+ mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
+
+ hi = (int) (mant>>32);
+
+ k = Q6_R_normamt_R(hi);
+ mant = mant << k;
+ exp = expa + expb - k;
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global fast2_ldmpy_asm
+ .type fast2_ldmpy_asm, @function
+fast2_ldmpy_asm:
+
+#define mantxl_ R9
+#define mantxl R14
+#define mantxh R15
+#define mantx R15:14
+#define mantbl R2
+#define mantbl_ R8
+#define mantbh R3
+#define mantb R3:2
+#define expa R4
+#define expb R5
+#define c8001 R8
+#define mantd R7:6
+#define lmantc R11:10
+#define kp R9
+#define min R13:12
+#define minh R13
+#define max R13:12
+#define maxh R13
+#define ret R0
+
+ .falign
+ {
+ mantx = memd(r29+#0)
+ mantb = memd(r29+#16)
+ min = #0
+ }
+ {
+ mantbl_= extractu(mantbl, #31, #1)
+ mantxl_= extractu(mantxl, #31, #1)
+ minh.H = #0x8000
+ }
+ {
+ lmantc = mpy(mantxh, mantbh)
+ mantd = mpy(mantxh, mantbl_)
+ expa = memw(r29+#8)
+ expb = memw(r29+#24)
+ }
+ {
+ lmantc = add(lmantc, lmantc)
+ mantd += mpy(mantbh, mantxl_)
+ }
+ {
+ mantd = asr(mantd, #30)
+ c8001.L = #0x0001
+ p1 = cmp.eq(mantx, mantb)
+ }
+ {
+ mantd = add(mantd, lmantc)
+ expa= add(expa, expb)
+ p2 = cmp.eq(mantb, min)
+ }
+ {
+ kp = clb(mantd)
+ c8001.H = #0x8000
+ p1 = and(p1, p2)
+ }
+ {
+ expa-= add(kp, #-1)
+ kp = add(kp, #-1)
+ if(p1) jump .Lsat
+ }
+ {
+ mantd = asl(mantd, kp)
+ memw(ret+#8) = expa
+ p0 = cmp.gt(kp, #58)
+ if(p0.new) jump:NT .Ldenorm //rarely happens
+ }
+ {
+ memd(ret+#0) = mantd
+ jumpr r31
+ }
+.Lsat:
+ {
+ max = #0
+ expa+= add(kp, #1)
+ }
+ {
+ maxh.H = #0x4000
+ memw(ret+#8) = expa
+ }
+ {
+ memd(ret+#0) = max
+ jumpr r31
+ }
+.Ldenorm:
+ {
+ memw(ret+#8) = c8001
+ mantx = #0
+ }
+ {
+ memd(ret+#0) = mantx
+ jumpr r31
+ }
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S b/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S
new file mode 100644
index 000000000000..215936b783ca
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S
@@ -0,0 +1,400 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== */
+/* FUNCTIONS Optimized double floating point operators */
+/* ==================================================================== */
+/* c = dadd_asm(a, b) */
+/* ====================================================================
+
+QDOUBLE dadd(QDOUBLE a,QDOUBLE b) {
+ QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = HEXAGON_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = HEXAGON_R_sxth_R(b) ;
+ int exp, expdiff, j, k, hi, lo, cn;
+ lint mant;
+
+ expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
+ expdiff = HEXAGON_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) + (mantb>>expb);
+
+ hi = (int) (mant>>32);
+ lo = (int) (mant);
+
+ k = HEXAGON_R_normamt_R(hi);
+ if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
+
+ mant = (mant << k);
+ cn = (mant == 0x8000000000000000LL);
+ exp = exp - k + cn;
+
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global dadd_asm
+ .type dadd_asm, @function
+dadd_asm:
+
+#define manta R0
+#define mantexpa R1:0
+#define lmanta R1:0
+#define mantb R2
+#define mantexpb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define mantexpd R7:6
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define manth R1
+#define mantl R0
+#define zero R7:6
+#define zerol R6
+#define minus R3:2
+#define minusl R2
+#define maxneg R9
+#define minmin R11:10 // exactly 0x800000000000000000LL
+#define minminh R11
+#define k R4
+#define kl R5
+#define ce P0
+ .falign
+ {
+ mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+ c63 = #62
+ expa = SXTH(manta)
+ expb = SXTH(mantb)
+ } {
+ expd = SXTH(expd)
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ manta.L = #0
+ expd = MIN(expd, c63)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ mantb.L = #0
+ zero = #0
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ minmin = #0
+ } {
+ lmant = add(lmanta, lmantb)
+ minus = #-1
+ minminh.H = #0x8000
+ } {
+ k = NORMAMT(manth)
+ kl = NORMAMT(mantl)
+ p0 = cmp.eq(manth, zerol)
+ p1 = cmp.eq(manth, minusl)
+ } {
+ p0 = OR(p0, p1)
+ if(p0.new) k = add(kl, #31)
+ maxneg.H = #0
+ } {
+ mantexpa = ASL(lmant, k)
+ exp = SUB(exp, k)
+ maxneg.L = #0x8001
+ } {
+ p0 = cmp.eq(mantexpa, zero)
+ p1 = cmp.eq(mantexpa, minus)
+ manta.L = #0
+ exp = ZXTH(exp)
+ } {
+ p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
+ if(p2.new) exp = add(exp, #1)
+ }
+#if (__HEXAGON_ARCH__ == 60)
+ {
+ p0 = OR(p0, p1)
+ if( p0.new) manta = OR(manta,maxneg)
+ if(!p0.new) manta = OR(manta,exp)
+ }
+ jumpr r31
+#else
+ {
+ p0 = OR(p0, p1)
+ if( p0.new) manta = OR(manta,maxneg)
+ if(!p0.new) manta = OR(manta,exp)
+ jumpr r31
+ }
+#endif
+/* =================================================================== *
+ QDOUBLE dsub(QDOUBLE a,QDOUBLE b) {
+ QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = HEXAGON_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = HEXAGON_R_sxth_R(b) ;
+ int exp, expdiff, j, k, hi, lo, cn;
+ lint mant;
+
+ expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
+ expdiff = HEXAGON_R_sxth_R(expdiff) ;
+ if (expdiff > 63) { expdiff = 62;}
+ if (expa > expb) {
+ exp = expa + 1;
+ expa = 1;
+ expb = expdiff + 1;
+ } else {
+ exp = expb + 1;
+ expb = 1;
+ expa = expdiff + 1;
+ }
+ mant = (manta>>expa) - (mantb>>expb);
+
+ hi = (int) (mant>>32);
+ lo = (int) (mant);
+
+ k = HEXAGON_R_normamt_R(hi);
+ if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
+
+ mant = (mant << k);
+ cn = (mant == 0x8000000000000000LL);
+ exp = exp - k + cn;
+
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global dsub_asm
+ .type dsub_asm, @function
+dsub_asm:
+
+#define manta R0
+#define mantexpa R1:0
+#define lmanta R1:0
+#define mantb R2
+#define mantexpb R3:2
+#define lmantb R3:2
+#define expa R4
+#define expb R5
+#define mantexpd R7:6
+#define expd R6
+#define exp R8
+#define c63 R9
+#define lmant R1:0
+#define manth R1
+#define mantl R0
+#define zero R7:6
+#define zerol R6
+#define minus R3:2
+#define minusl R2
+#define maxneg R9
+#define minmin R11:10 // exactly 0x800000000000000000LL
+#define minminh R11
+#define k R4
+#define kl R5
+#define ce P0
+ .falign
+ {
+ mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+ c63 = #62
+ expa = SXTH(manta)
+ expb = SXTH(mantb)
+ } {
+ expd = SXTH(expd)
+ ce = CMP.GT(expa, expb);
+ if ( ce.new) exp = add(expa, #1)
+ if (!ce.new) exp = add(expb, #1)
+ } {
+ if ( ce) expa = #1
+ if (!ce) expb = #1
+ manta.L = #0
+ expd = MIN(expd, c63)
+ } {
+ if (!ce) expa = add(expd, #1)
+ if ( ce) expb = add(expd, #1)
+ mantb.L = #0
+ zero = #0
+ } {
+ lmanta = ASR(lmanta, expa)
+ lmantb = ASR(lmantb, expb)
+ minmin = #0
+ } {
+ lmant = sub(lmanta, lmantb)
+ minus = #-1
+ minminh.H = #0x8000
+ } {
+ k = NORMAMT(manth)
+ kl = NORMAMT(mantl)
+ p0 = cmp.eq(manth, zerol)
+ p1 = cmp.eq(manth, minusl)
+ } {
+ p0 = OR(p0, p1)
+ if(p0.new) k = add(kl, #31)
+ maxneg.H = #0
+ } {
+ mantexpa = ASL(lmant, k)
+ exp = SUB(exp, k)
+ maxneg.L = #0x8001
+ } {
+ p0 = cmp.eq(mantexpa, zero)
+ p1 = cmp.eq(mantexpa, minus)
+ manta.L = #0
+ exp = ZXTH(exp)
+ } {
+ p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
+ if(p2.new) exp = add(exp, #1)
+ }
+#if (__HEXAGON_ARCH__ == 60)
+ {
+ p0 = OR(p0, p1)
+ if( p0.new) manta = OR(manta,maxneg)
+ if(!p0.new) manta = OR(manta,exp)
+ }
+ jumpr r31
+#else
+ {
+ p0 = OR(p0, p1)
+ if( p0.new) manta = OR(manta,maxneg)
+ if(!p0.new) manta = OR(manta,exp)
+ jumpr r31
+ }
+#endif
+/* ==================================================================== *
+ QDOUBLE dmpy(QDOUBLE a,QDOUBLE b) {
+ QDOUBLE c;
+ lint manta = a & MANTMASK;
+ int expa = HEXAGON_R_sxth_R(a) ;
+ lint mantb = b & MANTMASK;
+ int expb = HEXAGON_R_sxth_R(b) ;
+ int exp, k;
+ lint mant;
+ int hia, hib, hi, lo;
+ unsigned int loa, lob;
+
+ hia = (int)(a >> 32);
+ loa = HEXAGON_R_extractu_RII((int)manta, 31, 1);
+ hib = (int)(b >> 32);
+ lob = HEXAGON_R_extractu_RII((int)mantb, 31, 1);
+
+ mant = HEXAGON_P_mpy_RR(hia, lob);
+ mant = HEXAGON_P_mpyacc_RR(mant,hib, loa);
+ mant = (mant >> 30) + (HEXAGON_P_mpy_RR(hia, hib)<<1);
+
+ hi = (int) (mant>>32);
+ lo = (int) (mant);
+
+ k = HEXAGON_R_normamt_R(hi);
+ if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
+ mant = mant << k;
+ exp = expa + expb - k;
+ if (mant == 0 || mant == -1) exp = 0x8001;
+ c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+ return(c);
+ }
+ * ==================================================================== */
+ .text
+ .global dmpy_asm
+ .type dmpy_asm, @function
+dmpy_asm:
+
+#define mantal R0
+#define mantah R1
+#define mantexpa R1:0
+#define mantbl R2
+#define mantbh R3
+#define mantexpb R3:2
+#define expa R4
+#define expb R5
+#define mantexpd R7:6
+#define exp R8
+#define lmantc R11:10
+#define mantch R11
+#define mantcl R10
+#define zero0 R7:6
+#define zero0l R6
+#define minus1 R3:2
+#define minus1l R2
+#define maxneg R9
+#define k R4
+#define kl R5
+
+ .falign
+ {
+ mantbl = lsr(mantbl, #16)
+ mantal = lsr(mantal, #16)
+ expa = sxth(mantal)
+ expb = sxth(mantbl)
+ }
+ {
+ lmantc = mpy(mantah, mantbh)
+ mantexpd = mpy(mantah, mantbl)
+ }
+ {
+ lmantc = add(lmantc, lmantc) //<<1
+ mantexpd+= mpy(mantbh, mantal)
+ }
+ {
+ lmantc += asr(mantexpd, #15)
+ exp = add(expa, expb)
+ zero0 = #0
+ minus1 = #-1
+ }
+ {
+ k = normamt(mantch)
+ kl = normamt(mantcl)
+ p0 = cmp.eq(mantch, zero0l)
+ p1 = cmp.eq(mantch, minus1l)
+ }
+ {
+ p0 = or(p0, p1)
+ if(p0.new) k = add(kl, #31)
+ maxneg.H = #0
+ }
+ {
+ mantexpa = asl(lmantc, k)
+ exp = sub(exp, k)
+ maxneg.L = #0x8001
+ }
+ {
+ p0 = cmp.eq(mantexpa, zero0)
+ p1 = cmp.eq(mantexpa, minus1)
+ mantal.L = #0
+ exp = zxth(exp)
+ }
+#if (__HEXAGON_ARCH__ == 60)
+ {
+ p0 = or(p0, p1)
+ if( p0.new) mantal = or(mantal,maxneg)
+ if(!p0.new) mantal = or(mantal,exp)
+ }
+ jumpr r31
+#else
+ {
+ p0 = or(p0, p1)
+ if( p0.new) mantal = or(mantal,maxneg)
+ if(!p0.new) mantal = or(mantal,exp)
+ jumpr r31
+ }
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S
new file mode 100644
index 000000000000..12378f0da04e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S
@@ -0,0 +1,31 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size \name, . - \name
+.endm
+
+FUNCTION_BEGIN fmaf
+ r2 += sfmpy(r0, r1)
+ {
+ r0 = r2
+ jumpr r31
+ }
+FUNCTION_END fmaf
+
+ .globl fmal
+ .set fmal, fma
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S
new file mode 100644
index 000000000000..f3a218c9769b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S
@@ -0,0 +1,30 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size \name, . - \name
+.endm
+
+FUNCTION_BEGIN fmaxf
+ {
+ r0 = sfmax(r0, r1)
+ jumpr r31
+ }
+FUNCTION_END fmaxf
+
+ .globl fmaxl
+ .set fmaxl, fmax
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S
new file mode 100644
index 000000000000..ef9b0ff854a2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S
@@ -0,0 +1,30 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size \name, . - \name
+.endm
+
+FUNCTION_BEGIN fminf
+ {
+ r0 = sfmin(r0, r1)
+ jumpr r31
+ }
+FUNCTION_END fminf
+
+ .globl fminl
+ .set fminl, fmin
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S
new file mode 100644
index 000000000000..fbe09086cd33
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S
@@ -0,0 +1,125 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An optimized version of a memcpy which is equivalent to the following loop:
+//
+// volatile unsigned *dest;
+// unsigned *src;
+//
+// for (i = 0; i < num_words; ++i)
+// *dest++ = *src++;
+//
+// The corresponding C prototype for this function would be
+// void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
+// const unsigned *src,
+// unsigned num_words);
+//
+// *** Both dest and src must be aligned to 32-bit boundaries. ***
+// The code does not perform any runtime checks for this, and will fail
+// in bad ways if this requirement is not met.
+//
+// The "forward" in the name refers to the fact that the function copies
+// the words going forward in memory. It is incorrect to use this function
+// for cases where the original code copied words in any other order.
+//
+// *** This function is only for the use by the compiler. ***
+// The only indended use is for the LLVM compiler to generate calls to
+// this function, when a mem-copy loop, like the one above, is detected.
+
+ .text
+
+// Inputs:
+// r0: dest
+// r1: src
+// r2: num_words
+
+ .globl hexagon_memcpy_forward_vp4cp4n2
+ .balign 32
+ .type hexagon_memcpy_forward_vp4cp4n2,@function
+hexagon_memcpy_forward_vp4cp4n2:
+
+ // Compute r3 to be the number of words remaining in the current page.
+ // At the same time, compute r4 to be the number of 32-byte blocks
+ // remaining in the page (for prefetch).
+ {
+ r3 = sub(##4096, r1)
+ r5 = lsr(r2, #3)
+ }
+ {
+ // The word count before end-of-page is in the 12 lowest bits of r3.
+ // (If the address in r1 was already page-aligned, the bits are 0.)
+ r3 = extractu(r3, #10, #2)
+ r4 = extractu(r3, #7, #5)
+ }
+ {
+ r3 = minu(r2, r3)
+ r4 = minu(r5, r4)
+ }
+ {
+ r4 = or(r4, ##2105344) // 2105344 = 0x202000
+ p0 = cmp.eq(r3, #0)
+ if (p0.new) jump:nt .Lskipprolog
+ }
+ l2fetch(r1, r4)
+ {
+ loop0(.Lprolog, r3)
+ r2 = sub(r2, r3) // r2 = number of words left after the prolog.
+ }
+ .falign
+.Lprolog:
+ {
+ r4 = memw(r1++#4)
+ memw(r0++#4) = r4.new
+ } :endloop0
+.Lskipprolog:
+ {
+ // Let r3 = number of whole pages left (page = 1024 words).
+ r3 = lsr(r2, #10)
+ if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
+ }
+ {
+ loop1(.Lout, r3)
+ r2 = extractu(r2, #10, #0) // r2 = r2 & 1023
+ r3 = ##2105472 // r3 = 0x202080 (prefetch info)
+ }
+ // Iterate over pages.
+ .falign
+.Lout:
+ // Prefetch each individual page.
+ l2fetch(r1, r3)
+ loop0(.Lpage, #512)
+ .falign
+.Lpage:
+ r5:4 = memd(r1++#8)
+ {
+ memw(r0++#8) = r4
+ memw(r0+#4) = r5
+ } :endloop0:endloop1
+.Lskipmain:
+ {
+ r3 = ##2105344 // r3 = 0x202000 (prefetch info)
+ r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining.
+ p0 = cmp.eq(r2, #0)
+ if (p0.new) jumpr:nt r31
+ }
+ {
+ r3 = or(r3, r4)
+ loop0(.Lepilog, r2)
+ }
+ l2fetch(r1, r3)
+ .falign
+.Lepilog:
+ {
+ r4 = memw(r1++#4)
+ memw(r0++#4) = r4.new
+ } :endloop0
+
+ jumpr r31
+
+.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S
new file mode 100644
index 000000000000..bbc85c22db08
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S
@@ -0,0 +1,64 @@
+//===------------------------- memcopy routines ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+ {
+ p0 = bitsclr(r1,#7)
+ p0 = bitsclr(r0,#7)
+ if (p0.new) r5:4 = memd(r1)
+ r3 = #-3
+ }
+ {
+ if (!p0) jump .Lmemcpy_call
+ if (p0) memd(r0++#8) = r5:4
+ if (p0) r5:4 = memd(r1+#8)
+ r3 += lsr(r2,#3)
+ }
+ {
+ memd(r0++#8) = r5:4
+ r5:4 = memd(r1+#16)
+ r1 = add(r1,#24)
+ loop0(1f,r3)
+ }
+ .falign
+1:
+ {
+ memd(r0++#8) = r5:4
+ r5:4 = memd(r1++#8)
+ }:endloop0
+ {
+ memd(r0) = r5:4
+ r0 -= add(r2,#-8)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+
+.Lmemcpy_call:
+#ifdef __PIC__
+ jump memcpy@PLT
+#else
+ jump memcpy
+#endif
+
+ .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
+ .set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \
+ __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S b/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S
new file mode 100644
index 000000000000..12a0595fe465
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S
@@ -0,0 +1,83 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_moddi3
+ {
+ p3 = tstbit(r1,#31)
+ }
+ {
+ r1:0 = abs(r1:0)
+ r3:2 = abs(r3:2)
+ }
+ {
+ r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
+ r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
+ r5:4 = r3:2 // divisor moved into working registers
+ r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
+ }
+ {
+ r10 = sub(r7,r6) // left shift count for bit & divisor
+ r1:0 = #0 // initialize quotient to 0
+ r15:14 = #1 // initialize bit to 1
+ }
+ {
+ r11 = add(r10,#1) // loop count is 1 more than shift count
+ r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
+ r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
+ }
+ {
+ p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
+ loop0(1f,r11) // register loop
+ }
+ {
+ if (p0) jump .hexagon_moddi3_return // if divisor > dividend, we're done, so return
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
+ }
+ {
+ r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
+ r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
+ }
+ {
+ r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+ r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+ }
+ {
+ r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
+ r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
+ }:endloop0
+
+.hexagon_moddi3_return:
+ {
+ r1:0 = neg(r3:2)
+ }
+ {
+ r1:0 = vmux(p3,r1:0,r3:2)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_moddi3
+
+ .globl __qdsp_moddi3
+ .set __qdsp_moddi3, __hexagon_moddi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S
new file mode 100644
index 000000000000..5afda9e2978b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S
@@ -0,0 +1,66 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_modsi3
+ {
+ p2 = cmp.ge(r0,#0)
+ r2 = abs(r0)
+ r1 = abs(r1)
+ }
+ {
+ r3 = cl0(r2)
+ r4 = cl0(r1)
+ p0 = cmp.gtu(r1,r2)
+ }
+ {
+ r3 = sub(r4,r3)
+ if (p0) jumpr r31
+ }
+ {
+ p1 = cmp.eq(r3,#0)
+ loop0(1f,r3)
+ r0 = r2
+ r2 = lsl(r1,r3)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r2)
+ r2 = lsr(r2,#1)
+ if (p1) r1 = #0
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r1)
+ if (p2) jumpr r31
+ }
+ {
+ r0 = neg(r0)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_modsi3
+
+ .globl __qdsp_modsi3
+ .set __qdsp_modsi3, __hexagon_modsi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S
new file mode 100644
index 000000000000..6bdd4808c2b8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S
@@ -0,0 +1,66 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+
+FUNCTION_BEGIN __hexagon_divsf3
+ {
+ r2,p0 = sfrecipa(r0,r1)
+ r4 = sffixupd(r0,r1)
+ r3 = ##0x3f800000 // 1.0
+ }
+ {
+ r5 = sffixupn(r0,r1)
+ r3 -= sfmpy(r4,r2):lib // 1-(den/recip) yields error?
+ r6 = ##0x80000000
+ r7 = r3
+ }
+ {
+ r2 += sfmpy(r3,r2):lib
+ r3 = r7
+ r6 = r5
+ r0 = and(r6,r5)
+ }
+ {
+ r3 -= sfmpy(r4,r2):lib
+ r0 += sfmpy(r5,r2):lib
+ }
+ {
+ r2 += sfmpy(r3,r2):lib
+ r6 -= sfmpy(r0,r4):lib
+ }
+ {
+ r0 += sfmpy(r6,r2):lib
+ }
+ {
+ r5 -= sfmpy(r0,r4):lib
+ }
+ {
+ r0 += sfmpy(r5,r2,p0):scale
+ jumpr r31
+ }
+FUNCTION_END __hexagon_divsf3
+
+Q6_ALIAS(divsf3)
+FAST_ALIAS(divsf3)
+FAST2_ALIAS(divsf3)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S b/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S
new file mode 100644
index 000000000000..7f619002774f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S
@@ -0,0 +1,82 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+#define RIN r0
+#define S r0
+#define H r1
+#define D r2
+#define E r3
+#define HALF r4
+#define R r5
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+
+FUNCTION_BEGIN __hexagon_sqrtf
+ {
+ E,p0 = sfinvsqrta(RIN)
+ R = sffixupr(RIN)
+ HALF = ##0x3f000000 // 0.5
+ r1:0 = combine(#0,#0) // clear S/H
+ }
+ {
+ S += sfmpy(E,R):lib // S0
+ H += sfmpy(E,HALF):lib // H0
+ D = HALF
+ E = R
+ }
+ {
+ D -= sfmpy(S,H):lib // d0
+ p1 = sfclass(R,#1) // is zero?
+ //E -= sfmpy(S,S):lib // e0
+ }
+ {
+ S += sfmpy(S,D):lib // S1
+ H += sfmpy(H,D):lib // H1
+ D = HALF
+ E = R
+ }
+ {
+ D -= sfmpy(S,H):lib // d0
+ E -= sfmpy(S,S):lib // e0
+ }
+ {
+ S += sfmpy(H,E):lib // S2
+ H += sfmpy(H,D):lib // H2
+ D = HALF
+ E = R
+ }
+ {
+ //D -= sfmpy(S,H):lib // d2
+ E -= sfmpy(S,S):lib // e2
+ if (p1) r0 = or(r0,R) // sqrt(-0.0) = -0.0
+ }
+ {
+ S += sfmpy(H,E,p0):scale // S3
+ jumpr r31
+ }
+
+FUNCTION_END __hexagon_sqrtf
+
+Q6_ALIAS(sqrtf)
+FAST_ALIAS(sqrtf)
+FAST2_ALIAS(sqrtf)
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S b/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S
new file mode 100644
index 000000000000..1ca326b75208
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S
@@ -0,0 +1,71 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_udivdi3
+ {
+ r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
+ r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
+ r5:4 = r3:2 // divisor moved into working registers
+ r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
+ }
+ {
+ r10 = sub(r7,r6) // left shift count for bit & divisor
+ r1:0 = #0 // initialize quotient to 0
+ r15:14 = #1 // initialize bit to 1
+ }
+ {
+ r11 = add(r10,#1) // loop count is 1 more than shift count
+ r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
+ r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
+ }
+ {
+ p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
+ loop0(1f,r11) // register loop
+ }
+ {
+ if (p0) jumpr r31 // if divisor > dividend, we're done, so return
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
+ }
+ {
+ r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
+ r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
+ }
+ {
+ r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+ r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+ }
+ {
+ r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
+ r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
+ }:endloop0
+ {
+ jumpr r31 // return
+ }
+FUNCTION_END __hexagon_udivdi3
+
+ .globl __qdsp_udivdi3
+ .set __qdsp_udivdi3, __hexagon_udivdi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S b/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S
new file mode 100644
index 000000000000..deb5aae0924d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S
@@ -0,0 +1,71 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_udivmoddi4
+ {
+ r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
+ r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
+ r5:4 = r3:2 // divisor moved into working registers
+ r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
+ }
+ {
+ r10 = sub(r7,r6) // left shift count for bit & divisor
+ r1:0 = #0 // initialize quotient to 0
+ r15:14 = #1 // initialize bit to 1
+ }
+ {
+ r11 = add(r10,#1) // loop count is 1 more than shift count
+ r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
+ r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
+ }
+ {
+ p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
+ loop0(1f,r11) // register loop
+ }
+ {
+ if (p0) jumpr r31 // if divisor > dividend, we're done, so return
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
+ }
+ {
+ r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
+ r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
+ }
+ {
+ r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+ r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+ }
+ {
+ r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
+ r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
+ }:endloop0
+ {
+ jumpr r31 // return
+ }
+FUNCTION_END __hexagon_udivmoddi4
+
+ .globl __qdsp_udivmoddi4
+ .set __qdsp_udivmoddi4, __hexagon_udivmoddi4
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S b/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S
new file mode 100644
index 000000000000..25bbe7cd5925
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S
@@ -0,0 +1,60 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_udivmodsi4
+ {
+ r2 = cl0(r0)
+ r3 = cl0(r1)
+ r5:4 = combine(#1,#0)
+ p0 = cmp.gtu(r1,r0)
+ }
+ {
+ r6 = sub(r3,r2)
+ r4 = r1
+ r1:0 = combine(r0,r4)
+ if (p0) jumpr r31
+ }
+ {
+ r3:2 = vlslw(r5:4,r6)
+ loop0(1f,r6)
+ p0 = cmp.eq(r6,#0)
+ if (p0.new) r4 = #0
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r1 = sub(r1,r2)
+ if (!p0.new) r0 = add(r0,r3)
+ r3:2 = vlsrw(r3:2,#1)
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r1 = sub(r1,r4)
+ if (!p0.new) r0 = add(r0,r3)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_udivmodsi4
+
+ .globl __qdsp_udivmodsi4
+ .set __qdsp_udivmodsi4, __hexagon_udivmodsi4
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S
new file mode 100644
index 000000000000..54f0aa409f93
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S
@@ -0,0 +1,56 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_udivsi3
+ {
+ r2 = cl0(r0)
+ r3 = cl0(r1)
+ r5:4 = combine(#1,#0)
+ p0 = cmp.gtu(r1,r0)
+ }
+ {
+ r6 = sub(r3,r2)
+ r4 = r1
+ r1:0 = combine(r0,r4)
+ if (p0) jumpr r31
+ }
+ {
+ r3:2 = vlslw(r5:4,r6)
+ loop0(1f,r6)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r1 = sub(r1,r2)
+ if (!p0.new) r0 = add(r0,r3)
+ r3:2 = vlsrw(r3:2,#1)
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r0 = add(r0,r3)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_udivsi3
+
+ .globl __qdsp_udivsi3
+ .set __qdsp_udivsi3, __hexagon_udivsi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S b/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S
new file mode 100644
index 000000000000..f091521414af
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S
@@ -0,0 +1,74 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_umoddi3
+ {
+ r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
+ r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
+ r5:4 = r3:2 // divisor moved into working registers
+ r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
+ }
+ {
+ r10 = sub(r7,r6) // left shift count for bit & divisor
+ r1:0 = #0 // initialize quotient to 0
+ r15:14 = #1 // initialize bit to 1
+ }
+ {
+ r11 = add(r10,#1) // loop count is 1 more than shift count
+ r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
+ r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
+ }
+ {
+ p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
+ loop0(1f,r11) // register loop
+ }
+ {
+ if (p0) jump .hexagon_umoddi3_return // if divisor > dividend, we're done, so return
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
+ }
+ {
+ r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
+ r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
+ }
+ {
+ r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+ r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+ }
+ {
+ r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
+ r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
+ }:endloop0
+
+.hexagon_umoddi3_return:
+ {
+ r1:0 = r3:2
+ jumpr r31
+ }
+FUNCTION_END __hexagon_umoddi3
+
+ .globl __qdsp_umoddi3
+ .set __qdsp_umoddi3, __hexagon_umoddi3
diff --git a/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S b/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S
new file mode 100644
index 000000000000..a8270c2030d5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S
@@ -0,0 +1,55 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+ .macro FUNCTION_BEGIN name
+ .text
+ .p2align 5
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro FUNCTION_END name
+ .size \name, . - \name
+ .endm
+
+
+FUNCTION_BEGIN __hexagon_umodsi3
+ {
+ r2 = cl0(r0)
+ r3 = cl0(r1)
+ p0 = cmp.gtu(r1,r0)
+ }
+ {
+ r2 = sub(r3,r2)
+ if (p0) jumpr r31
+ }
+ {
+ loop0(1f,r2)
+ p1 = cmp.eq(r2,#0)
+ r2 = lsl(r1,r2)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r2)
+ r2 = lsr(r2,#1)
+ if (p1) r1 = #0
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r1)
+ jumpr r31
+ }
+FUNCTION_END __hexagon_umodsi3
+
+ .globl __qdsp_umodsi3
+ .set __qdsp_umodsi3, __hexagon_umodsi3
diff --git a/contrib/compiler-rt/lib/builtins/i386/ashldi3.S b/contrib/compiler-rt/lib/builtins/i386/ashldi3.S
new file mode 100644
index 000000000000..6f05dcf74443
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/ashldi3.S
@@ -0,0 +1,61 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __ashldi3(di_int input, int count);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store. This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashldi3)
+ movd 12(%esp), %xmm2 // Load count
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+ movd 4(%esp), %xmm0
+ movd 8(%esp), %xmm1
+ punpckldq %xmm1, %xmm0 // Load input
+#else
+ movq 4(%esp), %xmm0 // Load input
+#endif
+ psllq %xmm2, %xmm0 // shift input by count
+ movd %xmm0, %eax
+ psrlq $32, %xmm0
+ movd %xmm0, %edx
+ ret
+END_COMPILERRT_FUNCTION(__ashldi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashldi3)
+ movl 12(%esp), %ecx // Load count
+ movl 8(%esp), %edx // Load high
+ movl 4(%esp), %eax // Load low
+
+ testl $0x20, %ecx // If count >= 32
+ jnz 1f // goto 1
+ shldl %cl, %eax, %edx // left shift high by count
+ shll %cl, %eax // left shift low by count
+ ret
+
+1: movl %eax, %edx // Move low to high
+ xorl %eax, %eax // clear low
+ shll %cl, %edx // shift high by count - 32
+ ret
+END_COMPILERRT_FUNCTION(__ashldi3)
+
+#endif // __SSE2__
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S b/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S
new file mode 100644
index 000000000000..206369f360aa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/ashrdi3.S
@@ -0,0 +1,72 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __ashrdi3(di_int input, int count);
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
+ movd 12(%esp), %xmm2 // Load count
+ movl 8(%esp), %eax
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+ movd 4(%esp), %xmm0
+ movd 8(%esp), %xmm1
+ punpckldq %xmm1, %xmm0 // Load input
+#else
+ movq 4(%esp), %xmm0 // Load input
+#endif
+
+ psrlq %xmm2, %xmm0 // unsigned shift input by count
+
+ testl %eax, %eax // check the sign-bit of the input
+ jns 1f // early out for positive inputs
+
+ // If the input is negative, we need to construct the shifted sign bit
+ // to or into the result, as xmm does not have a signed right shift.
+ pcmpeqb %xmm1, %xmm1 // -1ULL
+ psrlq $58, %xmm1 // 0x3f
+ pandn %xmm1, %xmm2 // 63 - count
+ pcmpeqb %xmm1, %xmm1 // -1ULL
+ psubq %xmm1, %xmm2 // 64 - count
+ psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits
+ por %xmm1, %xmm0
+
+ // Move the result back to the general purpose registers and return
+1: movd %xmm0, %eax
+ psrlq $32, %xmm0
+ movd %xmm0, %edx
+ ret
+END_COMPILERRT_FUNCTION(__ashrdi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
+ movl 12(%esp), %ecx // Load count
+ movl 8(%esp), %edx // Load high
+ movl 4(%esp), %eax // Load low
+
+ testl $0x20, %ecx // If count >= 32
+ jnz 1f // goto 1
+
+ shrdl %cl, %edx, %eax // right shift low by count
+ sarl %cl, %edx // right shift high by count
+ ret
+
+1: movl %edx, %eax // Move high to low
+ sarl $31, %edx // clear high
+ sarl %cl, %eax // shift low by count - 32
+ ret
+END_COMPILERRT_FUNCTION(__ashrdi3)
+
+#endif // __SSE2__
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk.S b/contrib/compiler-rt/lib/builtins/i386/chkstk.S
new file mode 100644
index 000000000000..b59974868f21
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__chkstk_ms)
+ push %ecx
+ push %eax
+ cmp $0x1000,%eax
+ lea 12(%esp),%ecx
+ jb 1f
+2:
+ sub $0x1000,%ecx
+ test %ecx,(%ecx)
+ sub $0x1000,%eax
+ cmp $0x1000,%eax
+ ja 2b
+1:
+ sub %eax,%ecx
+ test %ecx,(%ecx)
+ pop %eax
+ pop %ecx
+ ret
+END_COMPILERRT_FUNCTION(__chkstk_ms)
+
+#endif // __i386__
diff --git a/contrib/compiler-rt/lib/builtins/i386/chkstk2.S b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S
new file mode 100644
index 000000000000..7d65bb088928
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/chkstk2.S
@@ -0,0 +1,40 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __i386__
+
+// _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
+// then decrement %esp by %eax. Preserves all registers except %esp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(_alloca) // _chkstk and _alloca are the same function
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+ push %ecx
+ cmp $0x1000,%eax
+ lea 8(%esp),%ecx // esp before calling this routine -> ecx
+ jb 1f
+2:
+ sub $0x1000,%ecx
+ test %ecx,(%ecx)
+ sub $0x1000,%eax
+ cmp $0x1000,%eax
+ ja 2b
+1:
+ sub %eax,%ecx
+ test %ecx,(%ecx)
+
+ lea 4(%esp),%eax // load pointer to the return address into eax
+ mov %ecx,%esp // install the new top of stack pointer into esp
+ mov -4(%eax),%ecx // restore ecx
+ push (%eax) // push return address onto the stack
+ sub %esp,%eax // restore the original value in eax
+ ret
+END_COMPILERRT_FUNCTION(__chkstk)
+END_COMPILERRT_FUNCTION(_alloca)
+
+#endif // __i386__
diff --git a/contrib/compiler-rt/lib/builtins/i386/divdi3.S b/contrib/compiler-rt/lib/builtins/i386/divdi3.S
new file mode 100644
index 000000000000..2fb4bdcad90d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/divdi3.S
@@ -0,0 +1,165 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __divdi3(di_int a, di_int b);
+
+// result = a / b.
+// both inputs and the output are 64-bit signed integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__divdi3)
+
+/* This is currently implemented by wrapping the unsigned divide up in an absolute
+ value, then restoring the correct sign at the end of the computation. This could
+ certainly be improved upon. */
+
+ pushl %esi
+ movl 20(%esp), %edx // high word of b
+ movl 16(%esp), %eax // low word of b
+ movl %edx, %ecx
+ sarl $31, %ecx // (b < 0) ? -1 : 0
+ xorl %ecx, %eax
+ xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
+ subl %ecx, %eax
+ sbbl %ecx, %edx // EDX:EAX = abs(b)
+ movl %edx, 20(%esp)
+ movl %eax, 16(%esp) // store abs(b) back to stack
+ movl %ecx, %esi // set aside sign of b
+
+ movl 12(%esp), %edx // high word of b
+ movl 8(%esp), %eax // low word of b
+ movl %edx, %ecx
+ sarl $31, %ecx // (a < 0) ? -1 : 0
+ xorl %ecx, %eax
+ xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
+ subl %ecx, %eax
+ sbbl %ecx, %edx // EDX:EAX = abs(a)
+ movl %edx, 12(%esp)
+ movl %eax, 8(%esp) // store abs(a) back to stack
+ xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b)
+
+ pushl %ebx
+ movl 24(%esp), %ebx // Find the index i of the leading bit in b.
+ bsrl %ebx, %ecx // If the high word of b is zero, jump to
+ jz 9f // the code to handle that special case [9].
+
+ /* High word of b is known to be non-zero on this branch */
+
+ movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
+
+ shrl %cl, %eax // Practically, this means that bhi is given by:
+ shrl %eax //
+ notl %ecx // bhi = (high word of b) << (31 - i) |
+ shll %cl, %ebx // (low word of b) >> (1 + i)
+ orl %eax, %ebx //
+ movl 16(%esp), %edx // Load the high and low words of a, and jump
+ movl 12(%esp), %eax // to [1] if the high word is larger than bhi
+ cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
+ jae 1f
+
+ /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ shrl %cl, %eax // q = qs >> (1 + i)
+ movl %eax, %edi
+ mull 24(%esp) // q*blo
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 28(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+ sbbl $0, %edi // decrement q if remainder is negative
+ xorl %edx, %edx
+ movl %edi, %eax
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %edi // Restore callee-save registers
+ popl %ebx
+ popl %esi
+ retl // Return
+
+
+1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ subl %ebx, %edx // subtract bhi from ahi so that divide will not
+ divl %ebx // overflow, and find q and r such that
+ //
+ // ahi:alo = (1:q)*bhi + r
+ //
+ // Note that q is a number in (31-i).(1+i)
+ // fix point.
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ orl $0x80000000, %eax
+ shrl %cl, %eax // q = (1:qs) >> (1 + i)
+ movl %eax, %edi
+ mull 24(%esp) // q*blo
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 28(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+ sbbl $0, %edi // decrement q if remainder is negative
+ xorl %edx, %edx
+ movl %edi, %eax
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %edi // Restore callee-save registers
+ popl %ebx
+ popl %esi
+ retl // Return
+
+
+9: /* High word of b is zero on this branch */
+
+ movl 16(%esp), %eax // Find qhi and rhi such that
+ movl 20(%esp), %ecx //
+ xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
+ divl %ecx //
+ movl %eax, %ebx //
+ movl 12(%esp), %eax // Find qlo such that
+ divl %ecx //
+ movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %ebx // Restore callee-save registers
+ popl %esi
+ retl // Return
+END_COMPILERRT_FUNCTION(__divdi3)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdidf.S b/contrib/compiler-rt/lib/builtins/i386/floatdidf.S
new file mode 100644
index 000000000000..d75dfe62d6a7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatdidf.S
@@ -0,0 +1,42 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __i386__
+
+CONST_SECTION
+
+ .balign 16
+twop52:
+ .quad 0x4330000000000000
+
+ .balign 16
+twop32:
+ .quad 0x41f0000000000000
+
+#define REL_ADDR(_a) (_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdidf)
+ cvtsi2sd 8(%esp), %xmm1
+ movss 4(%esp), %xmm0 // low 32 bits of a
+ calll 0f
+0: popl %eax
+ mulsd REL_ADDR(twop32), %xmm1 // a_hi as a double (without rounding)
+ movsd REL_ADDR(twop52), %xmm2 // 0x1.0p52
+ subsd %xmm2, %xmm1 // a_hi - 0x1p52 (no rounding occurs)
+ orpd %xmm2, %xmm0 // 0x1p52 + a_lo (no rounding occurs)
+ addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here)
+ movsd %xmm0, 4(%esp)
+ fldl 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatdidf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdisf.S b/contrib/compiler-rt/lib/builtins/i386/floatdisf.S
new file mode 100644
index 000000000000..0874eaaa9a98
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatdisf.S
@@ -0,0 +1,35 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatdisf(di_int a);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store. This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdisf)
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+ movd 4(%esp), %xmm0
+ movd 8(%esp), %xmm1
+ punpckldq %xmm1, %xmm0
+ movq %xmm0, 4(%esp)
+#endif
+ fildll 4(%esp)
+ fstps 4(%esp)
+ flds 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatdisf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatdixf.S b/contrib/compiler-rt/lib/builtins/i386/floatdixf.S
new file mode 100644
index 000000000000..1044ef55a1a8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatdixf.S
@@ -0,0 +1,33 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatdixf(di_int a);
+
+#ifdef __i386__
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store. This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatdixf)
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+ movd 4(%esp), %xmm0
+ movd 8(%esp), %xmm1
+ punpckldq %xmm1, %xmm0
+ movq %xmm0, 4(%esp)
+#endif
+ fildll 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatdixf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundidf.S b/contrib/compiler-rt/lib/builtins/i386/floatundidf.S
new file mode 100644
index 000000000000..fe032348e829
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatundidf.S
@@ -0,0 +1,55 @@
+//===-- floatundidf.S - Implement __floatundidf for i386 ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __i386__
+
+CONST_SECTION
+
+ .balign 16
+twop52:
+ .quad 0x4330000000000000
+
+ .balign 16
+twop84_plus_twop52:
+ .quad 0x4530000000100000
+
+ .balign 16
+twop84:
+ .quad 0x4530000000000000
+
+#define REL_ADDR(_a) (_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundidf)
+ movss 8(%esp), %xmm1 // high 32 bits of a
+ movss 4(%esp), %xmm0 // low 32 bits of a
+ calll 0f
+0: popl %eax
+ orpd REL_ADDR(twop84), %xmm1 // 0x1p84 + a_hi (no rounding occurs)
+ subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs)
+ orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs)
+ addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here)
+ movsd %xmm0, 4(%esp)
+ fldl 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatundidf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundisf.S b/contrib/compiler-rt/lib/builtins/i386/floatundisf.S
new file mode 100644
index 000000000000..16000b576026
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatundisf.S
@@ -0,0 +1,108 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatundisf(du_int a);
+
+// Note that there is a hardware instruction, fildll, that does most of what
+// this function needs to do. However, because of our ia32 ABI, it will take
+// a write-small read-large stall, so the software implementation here is
+// actually several cycles faster.
+
+// This is a branch-free implementation. A branchy implementation might be
+// faster for the common case if you know something a priori about the input
+// distribution.
+
+/* branch-free x87 implementation - one cycle slower than without x87.
+
+#ifdef __i386__
+
+CONST_SECTION
+.balign 3
+
+ .quad 0x43f0000000000000
+twop64: .quad 0x0000000000000000
+
+#define TWOp64 twop64-0b(%ecx,%eax,8)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+ movl 8(%esp), %eax
+ movd 8(%esp), %xmm1
+ movd 4(%esp), %xmm0
+ punpckldq %xmm1, %xmm0
+ calll 0f
+0: popl %ecx
+ sarl $31, %eax
+ movq %xmm0, 4(%esp)
+ fildll 4(%esp)
+ faddl TWOp64
+ fstps 4(%esp)
+ flds 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __i386__
+
+*/
+
+/* branch-free, x87-free implementation - faster at the expense of code size */
+
+#ifdef __i386__
+
+CONST_SECTION
+
+ .balign 16
+twop52:
+ .quad 0x4330000000000000
+ .quad 0x0000000000000fff
+
+ .balign 16
+sticky:
+ .quad 0x0000000000000000
+ .long 0x00000012
+
+ .balign 16
+twelve:
+ .long 0x00000000
+
+#define TWOp52 twop52-0b(%ecx)
+#define STICKY sticky-0b(%ecx,%eax,8)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+ movl 8(%esp), %eax
+ movd 8(%esp), %xmm1
+ movd 4(%esp), %xmm0
+ punpckldq %xmm1, %xmm0
+
+ calll 0f
+0: popl %ecx
+ shrl %eax // high 31 bits of input as sint32
+ addl $0x7ff80000, %eax
+ sarl $31, %eax // (big input) ? -1 : 0
+ movsd STICKY, %xmm1 // (big input) ? 0xfff : 0
+ movl $12, %edx
+ andl %eax, %edx // (big input) ? 12 : 0
+ movd %edx, %xmm3
+ andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0
+ movsd TWOp52, %xmm2 // 0x1.0p52
+ psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input
+ orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input)
+ orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
+ subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 | input & 0xfff) : input)
+ cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 | input & 0xfff) : input)
+ pslld $23, %xmm3
+ paddd %xmm3, %xmm0 // (float)input
+ movd %xmm0, 4(%esp)
+ flds 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/floatundixf.S b/contrib/compiler-rt/lib/builtins/i386/floatundixf.S
new file mode 100644
index 000000000000..c935670cb52f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/floatundixf.S
@@ -0,0 +1,46 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// long double __floatundixf(du_int a);16
+
+#ifdef __i386__
+
+CONST_SECTION
+
+ .balign 16
+twop52:
+ .quad 0x4330000000000000
+
+ .balign 16
+twop84_plus_twop52_neg:
+ .quad 0xc530000000100000
+
+ .balign 16
+twop84:
+ .quad 0x4530000000000000
+
+#define REL_ADDR(_a) (_a)-0b(%eax)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+ calll 0f
+0: popl %eax
+ movss 8(%esp), %xmm0 // hi 32 bits of input
+ movss 4(%esp), %xmm1 // lo 32 bits of input
+ orpd REL_ADDR(twop84), %xmm0 // 2^84 + hi (as a double)
+ orpd REL_ADDR(twop52), %xmm1 // 2^52 + lo (as a double)
+ addsd REL_ADDR(twop84_plus_twop52_neg), %xmm0 // hi - 2^52 (no rounding occurs)
+ movsd %xmm1, 4(%esp)
+ fldl 4(%esp)
+ movsd %xmm0, 4(%esp)
+ faddl 4(%esp)
+ ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S b/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S
new file mode 100644
index 000000000000..53e95cf76527
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/lshrdi3.S
@@ -0,0 +1,62 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __lshrdi3(di_int input, int count);
+
+// This routine has some extra memory traffic, loading the 64-bit input via two
+// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
+// store. This is to avoid a write-small, read-large stall.
+// However, if callers of this routine can be safely assumed to store the argument
+// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
+// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
+
+#ifdef __i386__
+#ifdef __SSE2__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
+ movd 12(%esp), %xmm2 // Load count
+#ifndef TRUST_CALLERS_USE_64_BIT_STORES
+ movd 4(%esp), %xmm0
+ movd 8(%esp), %xmm1
+ punpckldq %xmm1, %xmm0 // Load input
+#else
+ movq 4(%esp), %xmm0 // Load input
+#endif
+ psrlq %xmm2, %xmm0 // shift input by count
+ movd %xmm0, %eax
+ psrlq $32, %xmm0
+ movd %xmm0, %edx
+ ret
+END_COMPILERRT_FUNCTION(__lshrdi3)
+
+#else // Use GPRs instead of SSE2 instructions, if they aren't available.
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
+ movl 12(%esp), %ecx // Load count
+ movl 8(%esp), %edx // Load high
+ movl 4(%esp), %eax // Load low
+
+ testl $0x20, %ecx // If count >= 32
+ jnz 1f // goto 1
+
+ shrdl %cl, %edx, %eax // right shift low by count
+ shrl %cl, %edx // right shift high by count
+ ret
+
+1: movl %edx, %eax // Move high to low
+ xorl %edx, %edx // clear high
+ shrl %cl, %eax // shift low by count - 32
+ ret
+END_COMPILERRT_FUNCTION(__lshrdi3)
+
+#endif // __SSE2__
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/moddi3.S b/contrib/compiler-rt/lib/builtins/i386/moddi3.S
new file mode 100644
index 000000000000..a5bf9ce8ea0f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/moddi3.S
@@ -0,0 +1,169 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __moddi3(di_int a, di_int b);
+
+// result = remainder of a / b.
+// both inputs and the output are 64-bit signed integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__moddi3)
+
+/* This is currently implemented by wrapping the unsigned modulus up in an absolute
+ value. This could certainly be improved upon. */
+
+ pushl %esi
+ movl 20(%esp), %edx // high word of b
+ movl 16(%esp), %eax // low word of b
+ movl %edx, %ecx
+ sarl $31, %ecx // (b < 0) ? -1 : 0
+ xorl %ecx, %eax
+ xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
+ subl %ecx, %eax
+ sbbl %ecx, %edx // EDX:EAX = abs(b)
+ movl %edx, 20(%esp)
+ movl %eax, 16(%esp) // store abs(b) back to stack
+
+ movl 12(%esp), %edx // high word of b
+ movl 8(%esp), %eax // low word of b
+ movl %edx, %ecx
+ sarl $31, %ecx // (a < 0) ? -1 : 0
+ xorl %ecx, %eax
+ xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
+ subl %ecx, %eax
+ sbbl %ecx, %edx // EDX:EAX = abs(a)
+ movl %edx, 12(%esp)
+ movl %eax, 8(%esp) // store abs(a) back to stack
+ movl %ecx, %esi // set aside sign of a
+
+ pushl %ebx
+ movl 24(%esp), %ebx // Find the index i of the leading bit in b.
+ bsrl %ebx, %ecx // If the high word of b is zero, jump to
+ jz 9f // the code to handle that special case [9].
+
+ /* High word of b is known to be non-zero on this branch */
+
+ movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
+
+ shrl %cl, %eax // Practically, this means that bhi is given by:
+ shrl %eax //
+ notl %ecx // bhi = (high word of b) << (31 - i) |
+ shll %cl, %ebx // (low word of b) >> (1 + i)
+ orl %eax, %ebx //
+ movl 16(%esp), %edx // Load the high and low words of a, and jump
+ movl 12(%esp), %eax // to [2] if the high word is larger than bhi
+ cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
+ jae 2f
+
+ /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ shrl %cl, %eax // q = qs >> (1 + i)
+ movl %eax, %edi
+ mull 24(%esp) // q*blo
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 28(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+
+ jnc 1f // if positive, this is the result.
+ addl 24(%esp), %ebx // otherwise
+ adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
+1: movl %ebx, %eax
+ movl %ecx, %edx
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %edi // Restore callee-save registers
+ popl %ebx
+ popl %esi
+ retl // Return
+
+2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ subl %ebx, %edx // subtract bhi from ahi so that divide will not
+ divl %ebx // overflow, and find q and r such that
+ //
+ // ahi:alo = (1:q)*bhi + r
+ //
+ // Note that q is a number in (31-i).(1+i)
+ // fix point.
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ orl $0x80000000, %eax
+ shrl %cl, %eax // q = (1:qs) >> (1 + i)
+ movl %eax, %edi
+ mull 24(%esp) // q*blo
+ movl 16(%esp), %ebx
+ movl 20(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 28(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+
+ jnc 3f // if positive, this is the result.
+ addl 24(%esp), %ebx // otherwise
+ adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
+3: movl %ebx, %eax
+ movl %ecx, %edx
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %edi // Restore callee-save registers
+ popl %ebx
+ popl %esi
+ retl // Return
+
+9: /* High word of b is zero on this branch */
+
+ movl 16(%esp), %eax // Find qhi and rhi such that
+ movl 20(%esp), %ecx //
+ xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
+ divl %ecx //
+ movl %eax, %ebx //
+ movl 12(%esp), %eax // Find rlo such that
+ divl %ecx //
+ movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
+ popl %ebx //
+ xorl %edx, %edx // and return 0:rlo
+
+ addl %esi, %eax // Restore correct sign to result
+ adcl %esi, %edx
+ xorl %esi, %eax
+ xorl %esi, %edx
+ popl %esi
+ retl // Return
+END_COMPILERRT_FUNCTION(__moddi3)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/muldi3.S b/contrib/compiler-rt/lib/builtins/i386/muldi3.S
new file mode 100644
index 000000000000..12394606421c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/muldi3.S
@@ -0,0 +1,33 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// di_int __muldi3(di_int a, di_int b);
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__muldi3)
+ pushl %ebx
+ movl 16(%esp), %eax // b.lo
+ movl 12(%esp), %ecx // a.hi
+ imull %eax, %ecx // b.lo * a.hi
+
+ movl 8(%esp), %edx // a.lo
+ movl 20(%esp), %ebx // b.hi
+ imull %edx, %ebx // a.lo * b.hi
+
+ mull %edx // EDX:EAX = a.lo * b.lo
+ addl %ecx, %ebx // EBX = (a.lo*b.hi + a.hi*b.lo)
+ addl %ebx, %edx
+
+ popl %ebx
+ retl
+END_COMPILERRT_FUNCTION(__muldi3)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/udivdi3.S b/contrib/compiler-rt/lib/builtins/i386/udivdi3.S
new file mode 100644
index 000000000000..727613639b12
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/udivdi3.S
@@ -0,0 +1,118 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// du_int __udivdi3(du_int a, du_int b);
+
+// result = a / b.
+// both inputs and the output are 64-bit unsigned integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__udivdi3)
+
+ pushl %ebx
+ movl 20(%esp), %ebx // Find the index i of the leading bit in b.
+ bsrl %ebx, %ecx // If the high word of b is zero, jump to
+ jz 9f // the code to handle that special case [9].
+
+ /* High word of b is known to be non-zero on this branch */
+
+ movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
+
+ shrl %cl, %eax // Practically, this means that bhi is given by:
+ shrl %eax //
+ notl %ecx // bhi = (high word of b) << (31 - i) |
+ shll %cl, %ebx // (low word of b) >> (1 + i)
+ orl %eax, %ebx //
+ movl 12(%esp), %edx // Load the high and low words of a, and jump
+ movl 8(%esp), %eax // to [1] if the high word is larger than bhi
+ cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
+ jae 1f
+
+ /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ shrl %cl, %eax // q = qs >> (1 + i)
+ movl %eax, %edi
+ mull 20(%esp) // q*blo
+ movl 12(%esp), %ebx
+ movl 16(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 24(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+ sbbl $0, %edi // decrement q if remainder is negative
+ xorl %edx, %edx
+ movl %edi, %eax
+ popl %edi
+ popl %ebx
+ retl
+
+
+1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ subl %ebx, %edx // subtract bhi from ahi so that divide will not
+ divl %ebx // overflow, and find q and r such that
+ //
+ // ahi:alo = (1:q)*bhi + r
+ //
+ // Note that q is a number in (31-i).(1+i)
+ // fix point.
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ orl $0x80000000, %eax
+ shrl %cl, %eax // q = (1:qs) >> (1 + i)
+ movl %eax, %edi
+ mull 20(%esp) // q*blo
+ movl 12(%esp), %ebx
+ movl 16(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 24(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+ sbbl $0, %edi // decrement q if remainder is negative
+ xorl %edx, %edx
+ movl %edi, %eax
+ popl %edi
+ popl %ebx
+ retl
+
+
+9: /* High word of b is zero on this branch */
+
+ movl 12(%esp), %eax // Find qhi and rhi such that
+ movl 16(%esp), %ecx //
+ xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
+ divl %ecx //
+ movl %eax, %ebx //
+ movl 8(%esp), %eax // Find qlo such that
+ divl %ecx //
+ movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
+ popl %ebx //
+ retl // and return qhi:qlo
+END_COMPILERRT_FUNCTION(__udivdi3)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/i386/umoddi3.S b/contrib/compiler-rt/lib/builtins/i386/umoddi3.S
new file mode 100644
index 000000000000..763e821946c0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/i386/umoddi3.S
@@ -0,0 +1,129 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// du_int __umoddi3(du_int a, du_int b);
+
+// result = remainder of a / b.
+// both inputs and the output are 64-bit unsigned integers.
+// This will do whatever the underlying hardware is set to do on division by zero.
+// No other exceptions are generated, as the divide cannot overflow.
+//
+// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
+// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
+// currently possible via simulation of integer divides on the x87 unit.
+//
+
+// Stephen Canon, December 2008
+
+#ifdef __i386__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__umoddi3)
+
+ pushl %ebx
+ movl 20(%esp), %ebx // Find the index i of the leading bit in b.
+ bsrl %ebx, %ecx // If the high word of b is zero, jump to
+ jz 9f // the code to handle that special case [9].
+
+ /* High word of b is known to be non-zero on this branch */
+
+ movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
+
+ shrl %cl, %eax // Practically, this means that bhi is given by:
+ shrl %eax //
+ notl %ecx // bhi = (high word of b) << (31 - i) |
+ shll %cl, %ebx // (low word of b) >> (1 + i)
+ orl %eax, %ebx //
+ movl 12(%esp), %edx // Load the high and low words of a, and jump
+ movl 8(%esp), %eax // to [2] if the high word is larger than bhi
+ cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
+ jae 2f
+
+ /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ shrl %cl, %eax // q = qs >> (1 + i)
+ movl %eax, %edi
+ mull 20(%esp) // q*blo
+ movl 12(%esp), %ebx
+ movl 16(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 24(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+
+ jnc 1f // if positive, this is the result.
+ addl 20(%esp), %ebx // otherwise
+ adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
+1: movl %ebx, %eax
+ movl %ecx, %edx
+
+ popl %edi
+ popl %ebx
+ retl
+
+
+2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
+
+ subl %ebx, %edx // subtract bhi from ahi so that divide will not
+ divl %ebx // overflow, and find q and r such that
+ //
+ // ahi:alo = (1:q)*bhi + r
+ //
+ // Note that q is a number in (31-i).(1+i)
+ // fix point.
+
+ pushl %edi
+ notl %ecx
+ shrl %eax
+ orl $0x80000000, %eax
+ shrl %cl, %eax // q = (1:qs) >> (1 + i)
+ movl %eax, %edi
+ mull 20(%esp) // q*blo
+ movl 12(%esp), %ebx
+ movl 16(%esp), %ecx // ECX:EBX = a
+ subl %eax, %ebx
+ sbbl %edx, %ecx // ECX:EBX = a - q*blo
+ movl 24(%esp), %eax
+ imull %edi, %eax // q*bhi
+ subl %eax, %ecx // ECX:EBX = a - q*b
+
+ jnc 3f // if positive, this is the result.
+ addl 20(%esp), %ebx // otherwise
+ adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
+3: movl %ebx, %eax
+ movl %ecx, %edx
+
+ popl %edi
+ popl %ebx
+ retl
+
+
+
+9: /* High word of b is zero on this branch */
+
+ movl 12(%esp), %eax // Find qhi and rhi such that
+ movl 16(%esp), %ecx //
+ xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
+ divl %ecx //
+ movl %eax, %ebx //
+ movl 8(%esp), %eax // Find rlo such that
+ divl %ecx //
+ movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
+ popl %ebx //
+ xorl %edx, %edx // and return 0:rlo
+ retl //
+END_COMPILERRT_FUNCTION(__umoddi3)
+
+#endif // __i386__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/int_endianness.h b/contrib/compiler-rt/lib/builtins/int_endianness.h
new file mode 100644
index 000000000000..e2586c56bac8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_endianness.h
@@ -0,0 +1,116 @@
+/* ===-- int_endianness.h - configuration header for compiler-rt ------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ defined(__ORDER_LITTLE_ENDIAN__)
+
+/* Clang and GCC provide built-in endianness definitions. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif /* __BYTE_ORDER__ */
+
+#else /* Compilers other than Clang or GCC. */
+
+#if defined(__SVR4) && defined(__sun)
+#include <sys/byteorder.h>
+
+#if defined(_BIG_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif defined(_LITTLE_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#else /* !_LITTLE_ENDIAN */
+#error "unknown endianness"
+#endif /* !_LITTLE_ENDIAN */
+
+#endif /* Solaris and AuroraUX. */
+
+/* .. */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
+ defined(__minix)
+#include <sys/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif /* _BYTE_ORDER */
+
+#endif /* *BSD */
+
+#if defined(__OpenBSD__)
+#include <machine/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif /* _BYTE_ORDER */
+
+#endif /* OpenBSD */
+
+/* .. */
+
+/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the
+ * compiler (at least with GCC) */
+#if defined(__APPLE__) || defined(__ellcc__ )
+
+#ifdef __BIG_ENDIAN__
+#if __BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN 1
+#endif
+#endif /* __BIG_ENDIAN__ */
+
+#ifdef __LITTLE_ENDIAN__
+#if __LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+#endif
+#endif /* __LITTLE_ENDIAN__ */
+
+#endif /* Mac OSX */
+
+/* .. */
+
+#if defined(_WIN32)
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+
+#endif /* Windows */
+
+#endif /* Clang or GCC. */
+
+/* . */
+
+#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
+#error Unable to determine endian
+#endif /* Check we found an endianness correctly. */
+
+#endif /* INT_ENDIANNESS_H */
diff --git a/contrib/compiler-rt/lib/builtins/int_lib.h b/contrib/compiler-rt/lib/builtins/int_lib.h
new file mode 100644
index 000000000000..e07867d5580f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_lib.h
@@ -0,0 +1,163 @@
+/* ===-- int_lib.h - configuration header for compiler-rt -----------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+/* Assumption: Signed integral is 2's complement. */
+/* Assumption: Right shift of signed negative is arithmetic shift. */
+/* Assumption: Endianness is little or big (not mixed). */
+
+#if defined(__ELF__)
+#define FNALIAS(alias_name, original_name) \
+ void alias_name() __attribute__((__alias__(#original_name)))
+#define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee)))
+#else
+#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#endif
+
+/* ABI macro definitions */
+
+#if __ARM_EABI__
+# if defined(COMPILER_RT_ARMHF_TARGET) || (!defined(__clang__) && \
+ defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 5))
+/* The pcs attribute was introduced in GCC 4.5.0 */
+# define COMPILER_RT_ABI
+# else
+# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+# endif
+#else
+# define COMPILER_RT_ABI
+#endif
+
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
+#if (defined(__FreeBSD__) || defined(__NetBSD__)) && (defined(_KERNEL) || defined(_STANDALONE))
+/*
+ * Kernel and boot environment can't use normal headers,
+ * so use the equivalent system headers.
+ */
+#ifdef __FreeBSD__
+# include <sys/limits.h>
+#else
+# include <machine/limits.h>
+#endif
+# include <sys/stdint.h>
+# include <sys/types.h>
+#else
+/* Include the standard compiler builtin headers we use functionality from. */
+# include <limits.h>
+# include <stdint.h>
+# include <stdbool.h>
+# include <float.h>
+#endif
+
+/* Include the commonly used internal type definitions. */
+#include "int_types.h"
+
+/* Include internal utility function declarations. */
+#include "int_util.h"
+
+/*
+ * Workaround for LLVM bug 11663. Prevent endless recursion in
+ * __c?zdi2(), where calls to __builtin_c?z() are expanded to
+ * __c?zdi2() instead of __c?zsi2().
+ *
+ * Instead of placing this workaround in c?zdi2.c, put it in this
+ * global header to prevent other C files from making the detour
+ * through __c?zdi2() as well.
+ *
+ * This problem has been observed on FreeBSD for sparc64 and
+ * mips64 with GCC 4.2.1, and for riscv with GCC 5.2.0.
+ * Presumably it's any version of GCC, and targeting an arch that
+ * does not have dedicated bit counting instructions.
+ */
+#if defined(__FreeBSD__) && (defined(__sparc64__) || \
+ defined(__mips_n32) || defined(__mips_n64) || defined(__mips_o64) || \
+ defined(__riscv))
+si_int __clzsi2(si_int);
+si_int __ctzsi2(si_int);
+#define __builtin_clz __clzsi2
+#define __builtin_ctz __ctzsi2
+#endif /* FreeBSD && (sparc64 || mips_n32 || mips_n64 || mips_o64 || riscv) */
+
+COMPILER_RT_ABI si_int __paritysi2(si_int a);
+COMPILER_RT_ABI si_int __paritydi2(di_int a);
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
+#ifdef CRT_HAS_128BIT
+COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
+#endif
+
+/* Definitions for builtins unavailable on MSVC */
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+ unsigned long trailing_zero = 0;
+ if (_BitScanForward(&trailing_zero, value))
+ return trailing_zero;
+ return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+ unsigned long leading_zero = 0;
+ if (_BitScanReverse(&leading_zero, value))
+ return 31 - leading_zero;
+ return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+ unsigned long leading_zero = 0;
+ if (_BitScanReverse64(&leading_zero, value))
+ return 63 - leading_zero;
+ return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+ if (value == 0)
+ return 64;
+ uint32_t msh = (uint32_t)(value >> 32);
+ uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+ if (msh != 0)
+ return __builtin_clz(msh);
+ return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif /* defined(_MSC_VER) && !defined(__clang__) */
+
+#endif /* INT_LIB_H */
diff --git a/contrib/compiler-rt/lib/builtins/int_math.h b/contrib/compiler-rt/lib/builtins/int_math.h
new file mode 100644
index 000000000000..aa3d0721a8ab
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_math.h
@@ -0,0 +1,110 @@
+/* ===-- int_math.h - internal math inlines ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines substitutes for the libm functions used in some of the
+ * compiler-rt implementations, defined in such a way that there is not a direct
+ * dependency on libm or math.h. Instead, we use the compiler builtin versions
+ * where available. This reduces our dependencies on the system SDK by foisting
+ * the responsibility onto the compiler.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_MATH_H
+#define INT_MATH_H
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <math.h>
+#include <stdlib.h>
+#include <ymath.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CRT_INFINITY INFINITY
+#else
+#define CRT_INFINITY __builtin_huge_valf()
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_isfinite(x) _finite((x))
+#define crt_isinf(x) !_finite((x))
+#define crt_isnan(x) _isnan((x))
+#else
+/* Define crt_isfinite in terms of the builtin if available, otherwise provide
+ * an alternate version in terms of our other functions. This supports some
+ * versions of GCC which didn't have __builtin_isfinite.
+ */
+#if __has_builtin(__builtin_isfinite)
+# define crt_isfinite(x) __builtin_isfinite((x))
+#elif defined(__GNUC__)
+# define crt_isfinite(x) \
+ __extension__(({ \
+ __typeof((x)) x_ = (x); \
+ !crt_isinf(x_) && !crt_isnan(x_); \
+ }))
+#else
+# error "Do not know how to check for infinity"
+#endif /* __has_builtin(__builtin_isfinite) */
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+#endif /* _MSC_VER */
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_copysign(x, y) copysign((x), (y))
+#define crt_copysignf(x, y) copysignf((x), (y))
+#define crt_copysignl(x, y) copysignl((x), (y))
+#else
+#define crt_copysign(x, y) __builtin_copysign((x), (y))
+#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
+#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fabs(x) fabs((x))
+#define crt_fabsf(x) fabsf((x))
+#define crt_fabsl(x) fabs((x))
+#else
+#define crt_fabs(x) __builtin_fabs((x))
+#define crt_fabsf(x) __builtin_fabsf((x))
+#define crt_fabsl(x) __builtin_fabsl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fmax(x, y) __max((x), (y))
+#define crt_fmaxf(x, y) __max((x), (y))
+#define crt_fmaxl(x, y) __max((x), (y))
+#else
+#define crt_fmax(x, y) __builtin_fmax((x), (y))
+#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
+#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_logbl(x) logbl((x))
+#else
+#define crt_logbl(x) __builtin_logbl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_scalbn(x, y) scalbn((x), (y))
+#define crt_scalbnf(x, y) scalbnf((x), (y))
+#define crt_scalbnl(x, y) scalbnl((x), (y))
+#else
+#define crt_scalbn(x, y) __builtin_scalbn((x), (y))
+#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
+#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+#endif
+
+#endif /* INT_MATH_H */
diff --git a/contrib/compiler-rt/lib/builtins/int_types.h b/contrib/compiler-rt/lib/builtins/int_types.h
new file mode 100644
index 000000000000..4c91b75bac6c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_types.h
@@ -0,0 +1,189 @@
+/* ===-- int_lib.h - configuration header for compiler-rt -----------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines various standard types, most importantly a number of unions
+ * used to access parts of larger types.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
+#ifdef si_int
+#undef si_int
+#endif
+typedef int si_int;
+typedef unsigned su_int;
+
+typedef long long di_int;
+typedef unsigned long long du_int;
+
+typedef union
+{
+ di_int all;
+ struct
+ {
+#if _YUGA_LITTLE_ENDIAN
+ su_int low;
+ si_int high;
+#else
+ si_int high;
+ su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+ }s;
+} dwords;
+
+typedef union
+{
+ du_int all;
+ struct
+ {
+#if _YUGA_LITTLE_ENDIAN
+ su_int low;
+ su_int high;
+#else
+ su_int high;
+ su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+ }s;
+} udwords;
+
+#if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \
+ defined(__riscv) || defined(_WIN64)
+#define CRT_HAS_128BIT
+#endif
+
+/* MSVC doesn't have a working 128bit integer type. Users should really compile
+ * compiler-rt with clang, but if they happen to be doing a standalone build for
+ * asan or something else, disable the 128 bit parts so things sort of work.
+ */
+#if defined(_MSC_VER) && !defined(__clang__)
+#undef CRT_HAS_128BIT
+#endif
+
+#ifdef CRT_HAS_128BIT
+typedef int ti_int __attribute__ ((mode (TI)));
+typedef unsigned tu_int __attribute__ ((mode (TI)));
+
+typedef union
+{
+ ti_int all;
+ struct
+ {
+#if _YUGA_LITTLE_ENDIAN
+ du_int low;
+ di_int high;
+#else
+ di_int high;
+ du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+ }s;
+} twords;
+
+typedef union
+{
+ tu_int all;
+ struct
+ {
+#if _YUGA_LITTLE_ENDIAN
+ du_int low;
+ du_int high;
+#else
+ du_int high;
+ du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+ }s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+ twords r;
+ r.s.high = h;
+ r.s.low = l;
+ return r.all;
+}
+
+static __inline tu_int make_tu(du_int h, du_int l) {
+ utwords r;
+ r.s.high = h;
+ r.s.low = l;
+ return r.all;
+}
+
+#endif /* CRT_HAS_128BIT */
+
+#ifndef _STANDALONE
+typedef union
+{
+ su_int u;
+ float f;
+} float_bits;
+
+typedef union
+{
+ udwords u;
+ double f;
+} double_bits;
+#endif
+
+typedef struct
+{
+#if _YUGA_LITTLE_ENDIAN
+ udwords low;
+ udwords high;
+#else
+ udwords high;
+ udwords low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+} uqwords;
+
+/* Check if the target supports 80 bit extended precision long doubles.
+ * Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC
+ * still makes it 80 bits. Clang will match whatever compiler it is trying to
+ * be compatible with.
+ */
+#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \
+ defined(__m68k__) || defined(__ia64__)
+#define HAS_80_BIT_LONG_DOUBLE 1
+#else
+#define HAS_80_BIT_LONG_DOUBLE 0
+#endif
+
+#ifndef _STANDALONE
+typedef union
+{
+ uqwords u;
+ long double f;
+} long_double_bits;
+
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct { float real, imaginary; } Fcomplex;
+
+typedef struct { double real, imaginary; } Dcomplex;
+
+typedef struct { long double real, imaginary; } Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
+#endif
+#endif /* INT_TYPES_H */
+
diff --git a/contrib/compiler-rt/lib/builtins/int_util.c b/contrib/compiler-rt/lib/builtins/int_util.c
new file mode 100644
index 000000000000..752f2015580e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_util.c
@@ -0,0 +1,71 @@
+/* ===-- int_util.c - Implement internal utilities --------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_util.h"
+
+/* NOTE: The definitions in this file are declared weak because we clients to be
+ * able to arbitrarily package individual functions into separate .a files. If
+ * we did not declare these weak, some link situations might end up seeing
+ * duplicate strong definitions of the same symbol.
+ *
+ * We can't use this solution for kernel use (which may not support weak), but
+ * currently expect that when built for kernel use all the functionality is
+ * packaged into a single library.
+ */
+
+#ifdef KERNEL_USE
+
+NORETURN extern void panic(const char *, ...);
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+ panic("%s:%d: abort in %s", file, line, function);
+}
+
+#elif __APPLE__
+
+/* from libSystem.dylib */
+NORETURN extern void __assert_rtn(const char *func, const char *file, int line,
+ const char *message);
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+ __assert_rtn(function, file, line, "libcompiler_rt abort");
+}
+
+#elif __Fuchsia__
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+ __builtin_trap();
+}
+
+#else
+
+/* Get the system definition of abort() */
+#include <stdlib.h>
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+ abort();
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/int_util.h b/contrib/compiler-rt/lib/builtins/int_util.h
new file mode 100644
index 000000000000..c3c87381ad8a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/int_util.h
@@ -0,0 +1,33 @@
+/* ===-- int_util.h - internal utility functions ----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines non-inline utilities which are available for use in the
+ * library. The function definitions themselves are all contained in int_util.c
+ * which will always be compiled into any compiler-rt library.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+/** \brief Trigger a program abort (or panic for kernel code). */
+#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__)
+
+NORETURN void __compilerrt_abort_impl(const char *file, int line,
+ const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt) \
+ typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
+
+#endif /* INT_UTIL_H */
diff --git a/contrib/compiler-rt/lib/builtins/lshrdi3.c b/contrib/compiler-rt/lib/builtins/lshrdi3.c
new file mode 100644
index 000000000000..67b2a7668345
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/lshrdi3.c
@@ -0,0 +1,45 @@
+/* ===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __lshrdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: logical a >> b */
+
+/* Precondition: 0 <= b < bits_in_dword */
+
+COMPILER_RT_ABI di_int
+__lshrdi3(di_int a, si_int b)
+{
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ udwords input;
+ udwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */
+ {
+ result.s.high = 0;
+ result.s.low = input.s.high >> (b - bits_in_word);
+ }
+ else /* 0 <= b < bits_in_word */
+ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) COMPILER_RT_ALIAS(__lshrdi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/lshrti3.c b/contrib/compiler-rt/lib/builtins/lshrti3.c
new file mode 100644
index 000000000000..e4170ff84a5a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/lshrti3.c
@@ -0,0 +1,45 @@
+/* ===-- lshrti3.c - Implement __lshrti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __lshrti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: logical a >> b */
+
+/* Precondition: 0 <= b < bits_in_tword */
+
+COMPILER_RT_ABI ti_int
+__lshrti3(ti_int a, si_int b)
+{
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ utwords input;
+ utwords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */
+ {
+ result.s.high = 0;
+ result.s.low = input.s.high >> (b - bits_in_dword);
+ }
+ else /* 0 <= b < bits_in_dword */
+ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c b/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c
new file mode 100644
index 000000000000..c462e0dbf654
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mingw_fixfloat.c
@@ -0,0 +1,36 @@
+/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI di_int __fixdfdi(double a);
+COMPILER_RT_ABI di_int __fixsfdi(float a);
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+COMPILER_RT_ABI double __floatdidf(di_int a);
+COMPILER_RT_ABI float __floatdisf(di_int a);
+COMPILER_RT_ABI double __floatundidf(du_int a);
+COMPILER_RT_ABI float __floatundisf(du_int a);
+
+COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
+
+COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
+
+COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
+
+COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
+
+COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
+
+COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
+
+COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
+
+COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
diff --git a/contrib/compiler-rt/lib/builtins/moddi3.c b/contrib/compiler-rt/lib/builtins/moddi3.c
new file mode 100644
index 000000000000..a04279e3875f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/moddi3.c
@@ -0,0 +1,30 @@
+/*===-- moddi3.c - Implement __moddi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __moddi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI di_int
+__moddi3(di_int a, di_int b)
+{
+ const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+ di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */
+ b = (b ^ s) - s; /* negate if s == -1 */
+ s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */
+ a = (a ^ s) - s; /* negate if s == -1 */
+ du_int r;
+ __udivmoddi4(a, b, &r);
+ return ((di_int)r ^ s) - s; /* negate if s == -1 */
+}
diff --git a/contrib/compiler-rt/lib/builtins/modsi3.c b/contrib/compiler-rt/lib/builtins/modsi3.c
new file mode 100644
index 000000000000..86c73ce13777
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/modsi3.c
@@ -0,0 +1,23 @@
+/* ===-- modsi3.c - Implement __modsi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __modsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI si_int
+__modsi3(si_int a, si_int b)
+{
+ return a - __divsi3(a, b) * b;
+}
diff --git a/contrib/compiler-rt/lib/builtins/modti3.c b/contrib/compiler-rt/lib/builtins/modti3.c
new file mode 100644
index 000000000000..d505c07ac162
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/modti3.c
@@ -0,0 +1,34 @@
+/* ===-- modti3.c - Implement __modti3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __modti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/*Returns: a % b */
+
+COMPILER_RT_ABI ti_int
+__modti3(ti_int a, ti_int b)
+{
+ const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+ ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */
+ b = (b ^ s) - s; /* negate if s == -1 */
+ s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */
+ a = (a ^ s) - s; /* negate if s == -1 */
+ tu_int r;
+ __udivmodti4(a, b, &r);
+ return ((ti_int)r ^ s) - s; /* negate if s == -1 */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/muldc3.c b/contrib/compiler-rt/lib/builtins/muldc3.c
new file mode 100644
index 000000000000..16d8e98390a3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/muldc3.c
@@ -0,0 +1,73 @@
+/* ===-- muldc3.c - Implement __muldc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muldc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Dcomplex
+__muldc3(double __a, double __b, double __c, double __d)
+{
+ double __ac = __a * __c;
+ double __bd = __b * __d;
+ double __ad = __a * __d;
+ double __bc = __b * __c;
+ Dcomplex z;
+ COMPLEX_REAL(z) = __ac - __bd;
+ COMPLEX_IMAGINARY(z) = __ad + __bc;
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ int __recalc = 0;
+ if (crt_isinf(__a) || crt_isinf(__b))
+ {
+ __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a);
+ __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysign(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysign(0, __d);
+ __recalc = 1;
+ }
+ if (crt_isinf(__c) || crt_isinf(__d))
+ {
+ __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c);
+ __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d);
+ if (crt_isnan(__a))
+ __a = crt_copysign(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysign(0, __b);
+ __recalc = 1;
+ }
+ if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+ crt_isinf(__ad) || crt_isinf(__bc)))
+ {
+ if (crt_isnan(__a))
+ __a = crt_copysign(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysign(0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysign(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysign(0, __d);
+ __recalc = 1;
+ }
+ if (__recalc)
+ {
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/muldf3.c b/contrib/compiler-rt/lib/builtins/muldf3.c
new file mode 100644
index 000000000000..1bb103e38c13
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/muldf3.c
@@ -0,0 +1,30 @@
+//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
+ return __mulXf3__(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) {
+ return __muldf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__muldf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/muldi3.c b/contrib/compiler-rt/lib/builtins/muldi3.c
new file mode 100644
index 000000000000..a187315e9165
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/muldi3.c
@@ -0,0 +1,58 @@
+/* ===-- muldi3.c - Implement __muldi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muldi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+static
+di_int
+__muldsi3(su_int a, su_int b)
+{
+ dwords r;
+ const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
+ const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
+ r.s.low = (a & lower_mask) * (b & lower_mask);
+ su_int t = r.s.low >> bits_in_word_2;
+ r.s.low &= lower_mask;
+ t += (a >> bits_in_word_2) * (b & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_word_2;
+ r.s.high = t >> bits_in_word_2;
+ t = r.s.low >> bits_in_word_2;
+ r.s.low &= lower_mask;
+ t += (b >> bits_in_word_2) * (a & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_word_2;
+ r.s.high += t >> bits_in_word_2;
+ r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
+ return r.all;
+}
+
+/* Returns: a * b */
+
+COMPILER_RT_ABI di_int
+__muldi3(di_int a, di_int b)
+{
+ dwords x;
+ x.all = a;
+ dwords y;
+ y.all = b;
+ dwords r;
+ r.all = __muldsi3(x.s.low, y.s.low);
+ r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+ return r.all;
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) COMPILER_RT_ALIAS(__muldi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/mulodi4.c b/contrib/compiler-rt/lib/builtins/mulodi4.c
new file mode 100644
index 000000000000..d2fd7db2bcd3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulodi4.c
@@ -0,0 +1,58 @@
+/*===-- mulodi4.c - Implement __mulodi4 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulodi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1 if a * b overflows */
+
+COMPILER_RT_ABI di_int
+__mulodi4(di_int a, di_int b, int* overflow)
+{
+ const int N = (int)(sizeof(di_int) * CHAR_BIT);
+ const di_int MIN = (di_int)1 << (N-1);
+ const di_int MAX = ~MIN;
+ *overflow = 0;
+ di_int result = a * b;
+ if (a == MIN)
+ {
+ if (b != 0 && b != 1)
+ *overflow = 1;
+ return result;
+ }
+ if (b == MIN)
+ {
+ if (a != 0 && a != 1)
+ *overflow = 1;
+ return result;
+ }
+ di_int sa = a >> (N - 1);
+ di_int abs_a = (a ^ sa) - sa;
+ di_int sb = b >> (N - 1);
+ di_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return result;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ *overflow = 1;
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ *overflow = 1;
+ }
+ return result;
+}
diff --git a/contrib/compiler-rt/lib/builtins/mulosi4.c b/contrib/compiler-rt/lib/builtins/mulosi4.c
new file mode 100644
index 000000000000..422528085c44
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulosi4.c
@@ -0,0 +1,58 @@
+/*===-- mulosi4.c - Implement __mulosi4 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulosi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1 if a * b overflows */
+
+COMPILER_RT_ABI si_int
+__mulosi4(si_int a, si_int b, int* overflow)
+{
+ const int N = (int)(sizeof(si_int) * CHAR_BIT);
+ const si_int MIN = (si_int)1 << (N-1);
+ const si_int MAX = ~MIN;
+ *overflow = 0;
+ si_int result = a * b;
+ if (a == MIN)
+ {
+ if (b != 0 && b != 1)
+ *overflow = 1;
+ return result;
+ }
+ if (b == MIN)
+ {
+ if (a != 0 && a != 1)
+ *overflow = 1;
+ return result;
+ }
+ si_int sa = a >> (N - 1);
+ si_int abs_a = (a ^ sa) - sa;
+ si_int sb = b >> (N - 1);
+ si_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return result;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ *overflow = 1;
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ *overflow = 1;
+ }
+ return result;
+}
diff --git a/contrib/compiler-rt/lib/builtins/muloti4.c b/contrib/compiler-rt/lib/builtins/muloti4.c
new file mode 100644
index 000000000000..16b218920321
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/muloti4.c
@@ -0,0 +1,62 @@
+/*===-- muloti4.c - Implement __muloti4 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __muloti4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+/* Effects: sets *overflow to 1 if a * b overflows */
+
+COMPILER_RT_ABI ti_int
+__muloti4(ti_int a, ti_int b, int* overflow)
+{
+ const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+ const ti_int MIN = (ti_int)1 << (N-1);
+ const ti_int MAX = ~MIN;
+ *overflow = 0;
+ ti_int result = a * b;
+ if (a == MIN)
+ {
+ if (b != 0 && b != 1)
+ *overflow = 1;
+ return result;
+ }
+ if (b == MIN)
+ {
+ if (a != 0 && a != 1)
+ *overflow = 1;
+ return result;
+ }
+ ti_int sa = a >> (N - 1);
+ ti_int abs_a = (a ^ sa) - sa;
+ ti_int sb = b >> (N - 1);
+ ti_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return result;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ *overflow = 1;
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ *overflow = 1;
+ }
+ return result;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/mulsc3.c b/contrib/compiler-rt/lib/builtins/mulsc3.c
new file mode 100644
index 000000000000..c89cfd247a15
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulsc3.c
@@ -0,0 +1,73 @@
+/* ===-- mulsc3.c - Implement __mulsc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulsc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Fcomplex
+__mulsc3(float __a, float __b, float __c, float __d)
+{
+ float __ac = __a * __c;
+ float __bd = __b * __d;
+ float __ad = __a * __d;
+ float __bc = __b * __c;
+ Fcomplex z;
+ COMPLEX_REAL(z) = __ac - __bd;
+ COMPLEX_IMAGINARY(z) = __ad + __bc;
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ int __recalc = 0;
+ if (crt_isinf(__a) || crt_isinf(__b))
+ {
+ __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a);
+ __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysignf(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysignf(0, __d);
+ __recalc = 1;
+ }
+ if (crt_isinf(__c) || crt_isinf(__d))
+ {
+ __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c);
+ __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d);
+ if (crt_isnan(__a))
+ __a = crt_copysignf(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysignf(0, __b);
+ __recalc = 1;
+ }
+ if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+ crt_isinf(__ad) || crt_isinf(__bc)))
+ {
+ if (crt_isnan(__a))
+ __a = crt_copysignf(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysignf(0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysignf(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysignf(0, __d);
+ __recalc = 1;
+ }
+ if (__recalc)
+ {
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/mulsf3.c b/contrib/compiler-rt/lib/builtins/mulsf3.c
new file mode 100644
index 000000000000..1e2cf3e717c9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulsf3.c
@@ -0,0 +1,30 @@
+//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
+ return __mulXf3__(a, b);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) {
+ return __mulsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__mulsf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/multc3.c b/contrib/compiler-rt/lib/builtins/multc3.c
new file mode 100644
index 000000000000..0518bc2569f1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/multc3.c
@@ -0,0 +1,68 @@
+/* ===-- multc3.c - Implement __multc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __multc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+ long double ac = a * c;
+ long double bd = b * d;
+ long double ad = a * d;
+ long double bc = b * c;
+ long double _Complex z;
+ __real__ z = ac - bd;
+ __imag__ z = ad + bc;
+ if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) {
+ int recalc = 0;
+ if (crt_isinf(a) || crt_isinf(b)) {
+ a = crt_copysignl(crt_isinf(a) ? 1 : 0, a);
+ b = crt_copysignl(crt_isinf(b) ? 1 : 0, b);
+ if (crt_isnan(c))
+ c = crt_copysignl(0, c);
+ if (crt_isnan(d))
+ d = crt_copysignl(0, d);
+ recalc = 1;
+ }
+ if (crt_isinf(c) || crt_isinf(d)) {
+ c = crt_copysignl(crt_isinf(c) ? 1 : 0, c);
+ d = crt_copysignl(crt_isinf(d) ? 1 : 0, d);
+ if (crt_isnan(a))
+ a = crt_copysignl(0, a);
+ if (crt_isnan(b))
+ b = crt_copysignl(0, b);
+ recalc = 1;
+ }
+ if (!recalc && (crt_isinf(ac) || crt_isinf(bd) ||
+ crt_isinf(ad) || crt_isinf(bc))) {
+ if (crt_isnan(a))
+ a = crt_copysignl(0, a);
+ if (crt_isnan(b))
+ b = crt_copysignl(0, b);
+ if (crt_isnan(c))
+ c = crt_copysignl(0, c);
+ if (crt_isnan(d))
+ d = crt_copysignl(0, d);
+ recalc = 1;
+ }
+ if (recalc) {
+ __real__ z = CRT_INFINITY * (a * c - b * d);
+ __imag__ z = CRT_INFINITY * (a * d + b * c);
+ }
+ }
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/multf3.c b/contrib/compiler-rt/lib/builtins/multf3.c
new file mode 100644
index 000000000000..0b915923ea09
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/multf3.c
@@ -0,0 +1,25 @@
+//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) {
+ return __mulXf3__(a, b);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/multi3.c b/contrib/compiler-rt/lib/builtins/multi3.c
new file mode 100644
index 000000000000..e0d52d430b66
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/multi3.c
@@ -0,0 +1,58 @@
+/* ===-- multi3.c - Implement __multi3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+
+ * This file implements __multi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+static
+ti_int
+__mulddi3(du_int a, du_int b)
+{
+ twords r;
+ const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
+ const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
+ r.s.low = (a & lower_mask) * (b & lower_mask);
+ du_int t = r.s.low >> bits_in_dword_2;
+ r.s.low &= lower_mask;
+ t += (a >> bits_in_dword_2) * (b & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_dword_2;
+ r.s.high = t >> bits_in_dword_2;
+ t = r.s.low >> bits_in_dword_2;
+ r.s.low &= lower_mask;
+ t += (b >> bits_in_dword_2) * (a & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_dword_2;
+ r.s.high += t >> bits_in_dword_2;
+ r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
+ return r.all;
+}
+
+/* Returns: a * b */
+
+COMPILER_RT_ABI ti_int
+__multi3(ti_int a, ti_int b)
+{
+ twords x;
+ x.all = a;
+ twords y;
+ y.all = b;
+ twords r;
+ r.all = __mulddi3(x.s.low, y.s.low);
+ r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+ return r.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/mulvdi3.c b/contrib/compiler-rt/lib/builtins/mulvdi3.c
new file mode 100644
index 000000000000..e63249e0a04c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulvdi3.c
@@ -0,0 +1,56 @@
+/*===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI di_int
+__mulvdi3(di_int a, di_int b)
+{
+ const int N = (int)(sizeof(di_int) * CHAR_BIT);
+ const di_int MIN = (di_int)1 << (N-1);
+ const di_int MAX = ~MIN;
+ if (a == MIN)
+ {
+ if (b == 0 || b == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ if (b == MIN)
+ {
+ if (a == 0 || a == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ di_int sa = a >> (N - 1);
+ di_int abs_a = (a ^ sa) - sa;
+ di_int sb = b >> (N - 1);
+ di_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return a * b;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ compilerrt_abort();
+ }
+ return a * b;
+}
diff --git a/contrib/compiler-rt/lib/builtins/mulvsi3.c b/contrib/compiler-rt/lib/builtins/mulvsi3.c
new file mode 100644
index 000000000000..74ea4f2da226
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulvsi3.c
@@ -0,0 +1,56 @@
+/* ===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI si_int
+__mulvsi3(si_int a, si_int b)
+{
+ const int N = (int)(sizeof(si_int) * CHAR_BIT);
+ const si_int MIN = (si_int)1 << (N-1);
+ const si_int MAX = ~MIN;
+ if (a == MIN)
+ {
+ if (b == 0 || b == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ if (b == MIN)
+ {
+ if (a == 0 || a == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ si_int sa = a >> (N - 1);
+ si_int abs_a = (a ^ sa) - sa;
+ si_int sb = b >> (N - 1);
+ si_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return a * b;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ compilerrt_abort();
+ }
+ return a * b;
+}
diff --git a/contrib/compiler-rt/lib/builtins/mulvti3.c b/contrib/compiler-rt/lib/builtins/mulvti3.c
new file mode 100644
index 000000000000..f4c7d1612ba9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulvti3.c
@@ -0,0 +1,60 @@
+/* ===-- mulvti3.c - Implement __mulvti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a * b */
+
+/* Effects: aborts if a * b overflows */
+
+COMPILER_RT_ABI ti_int
+__mulvti3(ti_int a, ti_int b)
+{
+ const int N = (int)(sizeof(ti_int) * CHAR_BIT);
+ const ti_int MIN = (ti_int)1 << (N-1);
+ const ti_int MAX = ~MIN;
+ if (a == MIN)
+ {
+ if (b == 0 || b == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ if (b == MIN)
+ {
+ if (a == 0 || a == 1)
+ return a * b;
+ compilerrt_abort();
+ }
+ ti_int sa = a >> (N - 1);
+ ti_int abs_a = (a ^ sa) - sa;
+ ti_int sb = b >> (N - 1);
+ ti_int abs_b = (b ^ sb) - sb;
+ if (abs_a < 2 || abs_b < 2)
+ return a * b;
+ if (sa == sb)
+ {
+ if (abs_a > MAX / abs_b)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (abs_a > MIN / -abs_b)
+ compilerrt_abort();
+ }
+ return a * b;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/mulxc3.c b/contrib/compiler-rt/lib/builtins/mulxc3.c
new file mode 100644
index 000000000000..ba3221691821
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/mulxc3.c
@@ -0,0 +1,77 @@
+/* ===-- mulxc3.c - Implement __mulxc3 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __mulxc3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+#include "int_math.h"
+
+/* Returns: the product of a + ib and c + id */
+
+COMPILER_RT_ABI Lcomplex
+__mulxc3(long double __a, long double __b, long double __c, long double __d)
+{
+ long double __ac = __a * __c;
+ long double __bd = __b * __d;
+ long double __ad = __a * __d;
+ long double __bc = __b * __c;
+ Lcomplex z;
+ COMPLEX_REAL(z) = __ac - __bd;
+ COMPLEX_IMAGINARY(z) = __ad + __bc;
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
+ {
+ int __recalc = 0;
+ if (crt_isinf(__a) || crt_isinf(__b))
+ {
+ __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a);
+ __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysignl(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysignl(0, __d);
+ __recalc = 1;
+ }
+ if (crt_isinf(__c) || crt_isinf(__d))
+ {
+ __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c);
+ __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d);
+ if (crt_isnan(__a))
+ __a = crt_copysignl(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysignl(0, __b);
+ __recalc = 1;
+ }
+ if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) ||
+ crt_isinf(__ad) || crt_isinf(__bc)))
+ {
+ if (crt_isnan(__a))
+ __a = crt_copysignl(0, __a);
+ if (crt_isnan(__b))
+ __b = crt_copysignl(0, __b);
+ if (crt_isnan(__c))
+ __c = crt_copysignl(0, __c);
+ if (crt_isnan(__d))
+ __d = crt_copysignl(0, __d);
+ __recalc = 1;
+ }
+ if (__recalc)
+ {
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c);
+ }
+ }
+ return z;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/negdf2.c b/contrib/compiler-rt/lib/builtins/negdf2.c
new file mode 100644
index 000000000000..f0bfaad24743
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negdf2.c
@@ -0,0 +1,30 @@
+//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__negdf2(fp_t a) {
+ return fromRep(toRep(a) ^ signBit);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) {
+ return __negdf2(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) COMPILER_RT_ALIAS(__negdf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/negdi2.c b/contrib/compiler-rt/lib/builtins/negdi2.c
new file mode 100644
index 000000000000..3d49ba2899d2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negdi2.c
@@ -0,0 +1,26 @@
+/* ===-- negdi2.c - Implement __negdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+COMPILER_RT_ABI di_int
+__negdi2(di_int a)
+{
+ /* Note: this routine is here for API compatibility; any sane compiler
+ * should expand it inline.
+ */
+ return -a;
+}
diff --git a/contrib/compiler-rt/lib/builtins/negsf2.c b/contrib/compiler-rt/lib/builtins/negsf2.c
new file mode 100644
index 000000000000..05c97d4d5a11
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negsf2.c
@@ -0,0 +1,30 @@
+//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t
+__negsf2(fp_t a) {
+ return fromRep(toRep(a) ^ signBit);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) {
+ return __negsf2(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) COMPILER_RT_ALIAS(__negsf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/negti2.c b/contrib/compiler-rt/lib/builtins/negti2.c
new file mode 100644
index 000000000000..9b00b303f856
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negti2.c
@@ -0,0 +1,30 @@
+/* ===-- negti2.c - Implement __negti2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: -a */
+
+COMPILER_RT_ABI ti_int
+__negti2(ti_int a)
+{
+ /* Note: this routine is here for API compatibility; any sane compiler
+ * should expand it inline.
+ */
+ return -a;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/negvdi2.c b/contrib/compiler-rt/lib/builtins/negvdi2.c
new file mode 100644
index 000000000000..e336ecf28f0d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negvdi2.c
@@ -0,0 +1,28 @@
+/* ===-- negvdi2.c - Implement __negvdi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negvdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI di_int
+__negvdi2(di_int a)
+{
+ const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1);
+ if (a == MIN)
+ compilerrt_abort();
+ return -a;
+}
diff --git a/contrib/compiler-rt/lib/builtins/negvsi2.c b/contrib/compiler-rt/lib/builtins/negvsi2.c
new file mode 100644
index 000000000000..b9e93fef06c5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negvsi2.c
@@ -0,0 +1,28 @@
+/* ===-- negvsi2.c - Implement __negvsi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __negvsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI si_int
+__negvsi2(si_int a)
+{
+ const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1);
+ if (a == MIN)
+ compilerrt_abort();
+ return -a;
+}
diff --git a/contrib/compiler-rt/lib/builtins/negvti2.c b/contrib/compiler-rt/lib/builtins/negvti2.c
new file mode 100644
index 000000000000..85f9f7d19d9d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/negvti2.c
@@ -0,0 +1,32 @@
+/*===-- negvti2.c - Implement __negvti2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ *This file implements __negvti2 for the compiler_rt library.
+ *
+ *===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: -a */
+
+/* Effects: aborts if -a overflows */
+
+COMPILER_RT_ABI ti_int
+__negvti2(ti_int a)
+{
+ const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1);
+ if (a == MIN)
+ compilerrt_abort();
+ return -a;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/os_version_check.c b/contrib/compiler-rt/lib/builtins/os_version_check.c
new file mode 100644
index 000000000000..e0d40edc7e38
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/os_version_check.c
@@ -0,0 +1,226 @@
+/* ===-- os_version_check.c - OS version checking -------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements the function __isOSVersionAtLeast, used by
+ * Objective-C's @available
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifdef __APPLE__
+
+#include <TargetConditionals.h>
+#include <dispatch/dispatch.h>
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* These three variables hold the host's OS version. */
+static int32_t GlobalMajor, GlobalMinor, GlobalSubminor;
+static dispatch_once_t DispatchOnceCounter;
+
+/* We can't include <CoreFoundation/CoreFoundation.h> directly from here, so
+ * just forward declare everything that we need from it. */
+
+typedef const void *CFDataRef, *CFAllocatorRef, *CFPropertyListRef,
+ *CFStringRef, *CFDictionaryRef, *CFTypeRef, *CFErrorRef;
+
+#if __LLP64__
+typedef unsigned long long CFTypeID;
+typedef unsigned long long CFOptionFlags;
+typedef signed long long CFIndex;
+#else
+typedef unsigned long CFTypeID;
+typedef unsigned long CFOptionFlags;
+typedef signed long CFIndex;
+#endif
+
+typedef unsigned char UInt8;
+typedef _Bool Boolean;
+typedef CFIndex CFPropertyListFormat;
+typedef uint32_t CFStringEncoding;
+
+/* kCFStringEncodingASCII analog. */
+#define CF_STRING_ENCODING_ASCII 0x0600
+/* kCFStringEncodingUTF8 analog. */
+#define CF_STRING_ENCODING_UTF8 0x08000100
+#define CF_PROPERTY_LIST_IMMUTABLE 0
+
+typedef CFDataRef (*CFDataCreateWithBytesNoCopyFuncTy)(CFAllocatorRef,
+ const UInt8 *, CFIndex,
+ CFAllocatorRef);
+typedef CFPropertyListRef (*CFPropertyListCreateWithDataFuncTy)(
+ CFAllocatorRef, CFDataRef, CFOptionFlags, CFPropertyListFormat *,
+ CFErrorRef *);
+typedef CFPropertyListRef (*CFPropertyListCreateFromXMLDataFuncTy)(
+ CFAllocatorRef, CFDataRef, CFOptionFlags, CFStringRef *);
+typedef CFStringRef (*CFStringCreateWithCStringNoCopyFuncTy)(CFAllocatorRef,
+ const char *,
+ CFStringEncoding,
+ CFAllocatorRef);
+typedef const void *(*CFDictionaryGetValueFuncTy)(CFDictionaryRef,
+ const void *);
+typedef CFTypeID (*CFGetTypeIDFuncTy)(CFTypeRef);
+typedef CFTypeID (*CFStringGetTypeIDFuncTy)(void);
+typedef Boolean (*CFStringGetCStringFuncTy)(CFStringRef, char *, CFIndex,
+ CFStringEncoding);
+typedef void (*CFReleaseFuncTy)(CFTypeRef);
+
+/* Find and parse the SystemVersion.plist file. */
+static void parseSystemVersionPList(void *Unused) {
+ (void)Unused;
+ /* Load CoreFoundation dynamically */
+ const void *NullAllocator = dlsym(RTLD_DEFAULT, "kCFAllocatorNull");
+ if (!NullAllocator)
+ return;
+ const CFAllocatorRef AllocatorNull = *(const CFAllocatorRef *)NullAllocator;
+ CFDataCreateWithBytesNoCopyFuncTy CFDataCreateWithBytesNoCopyFunc =
+ (CFDataCreateWithBytesNoCopyFuncTy)dlsym(RTLD_DEFAULT,
+ "CFDataCreateWithBytesNoCopy");
+ if (!CFDataCreateWithBytesNoCopyFunc)
+ return;
+ CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc =
+ (CFPropertyListCreateWithDataFuncTy)dlsym(
+ RTLD_DEFAULT, "CFPropertyListCreateWithData");
+/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
+ * will be NULL on earlier OS versions. */
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ CFPropertyListCreateFromXMLDataFuncTy CFPropertyListCreateFromXMLDataFunc =
+ (CFPropertyListCreateFromXMLDataFuncTy)dlsym(
+ RTLD_DEFAULT, "CFPropertyListCreateFromXMLData");
+#pragma clang diagnostic pop
+ /* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it
+ * might be NULL in future OS versions. */
+ if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc)
+ return;
+ CFStringCreateWithCStringNoCopyFuncTy CFStringCreateWithCStringNoCopyFunc =
+ (CFStringCreateWithCStringNoCopyFuncTy)dlsym(
+ RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy");
+ if (!CFStringCreateWithCStringNoCopyFunc)
+ return;
+ CFDictionaryGetValueFuncTy CFDictionaryGetValueFunc =
+ (CFDictionaryGetValueFuncTy)dlsym(RTLD_DEFAULT, "CFDictionaryGetValue");
+ if (!CFDictionaryGetValueFunc)
+ return;
+ CFGetTypeIDFuncTy CFGetTypeIDFunc =
+ (CFGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFGetTypeID");
+ if (!CFGetTypeIDFunc)
+ return;
+ CFStringGetTypeIDFuncTy CFStringGetTypeIDFunc =
+ (CFStringGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
+ if (!CFStringGetTypeIDFunc)
+ return;
+ CFStringGetCStringFuncTy CFStringGetCStringFunc =
+ (CFStringGetCStringFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetCString");
+ if (!CFStringGetCStringFunc)
+ return;
+ CFReleaseFuncTy CFReleaseFunc =
+ (CFReleaseFuncTy)dlsym(RTLD_DEFAULT, "CFRelease");
+ if (!CFReleaseFunc)
+ return;
+
+ char *PListPath = "/System/Library/CoreServices/SystemVersion.plist";
+
+#if TARGET_OS_SIMULATOR
+ char *PListPathPrefix = getenv("IPHONE_SIMULATOR_ROOT");
+ if (!PListPathPrefix)
+ return;
+ char FullPath[strlen(PListPathPrefix) + strlen(PListPath) + 1];
+ strcpy(FullPath, PListPathPrefix);
+ strcat(FullPath, PListPath);
+ PListPath = FullPath;
+#endif
+ FILE *PropertyList = fopen(PListPath, "r");
+ if (!PropertyList)
+ return;
+
+ /* Dynamically allocated stuff. */
+ CFDictionaryRef PListRef = NULL;
+ CFDataRef FileContentsRef = NULL;
+ UInt8 *PListBuf = NULL;
+
+ fseek(PropertyList, 0, SEEK_END);
+ long PListFileSize = ftell(PropertyList);
+ if (PListFileSize < 0)
+ goto Fail;
+ rewind(PropertyList);
+
+ PListBuf = malloc((size_t)PListFileSize);
+ if (!PListBuf)
+ goto Fail;
+
+ size_t NumRead = fread(PListBuf, 1, (size_t)PListFileSize, PropertyList);
+ if (NumRead != (size_t)PListFileSize)
+ goto Fail;
+
+ /* Get the file buffer into CF's format. We pass in a null allocator here *
+ * because we free PListBuf ourselves */
+ FileContentsRef = (*CFDataCreateWithBytesNoCopyFunc)(
+ NULL, PListBuf, (CFIndex)NumRead, AllocatorNull);
+ if (!FileContentsRef)
+ goto Fail;
+
+ if (CFPropertyListCreateWithDataFunc)
+ PListRef = (*CFPropertyListCreateWithDataFunc)(
+ NULL, FileContentsRef, CF_PROPERTY_LIST_IMMUTABLE, NULL, NULL);
+ else
+ PListRef = (*CFPropertyListCreateFromXMLDataFunc)(
+ NULL, FileContentsRef, CF_PROPERTY_LIST_IMMUTABLE, NULL);
+ if (!PListRef)
+ goto Fail;
+
+ CFStringRef ProductVersion = (*CFStringCreateWithCStringNoCopyFunc)(
+ NULL, "ProductVersion", CF_STRING_ENCODING_ASCII, AllocatorNull);
+ if (!ProductVersion)
+ goto Fail;
+ CFTypeRef OpaqueValue = (*CFDictionaryGetValueFunc)(PListRef, ProductVersion);
+ (*CFReleaseFunc)(ProductVersion);
+ if (!OpaqueValue ||
+ (*CFGetTypeIDFunc)(OpaqueValue) != (*CFStringGetTypeIDFunc)())
+ goto Fail;
+
+ char VersionStr[32];
+ if (!(*CFStringGetCStringFunc)((CFStringRef)OpaqueValue, VersionStr,
+ sizeof(VersionStr), CF_STRING_ENCODING_UTF8))
+ goto Fail;
+ sscanf(VersionStr, "%d.%d.%d", &GlobalMajor, &GlobalMinor, &GlobalSubminor);
+
+Fail:
+ if (PListRef)
+ (*CFReleaseFunc)(PListRef);
+ if (FileContentsRef)
+ (*CFReleaseFunc)(FileContentsRef);
+ free(PListBuf);
+ fclose(PropertyList);
+}
+
+int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
+ /* Populate the global version variables, if they haven't already. */
+ dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList);
+
+ if (Major < GlobalMajor)
+ return 1;
+ if (Major > GlobalMajor)
+ return 0;
+ if (Minor < GlobalMinor)
+ return 1;
+ if (Minor > GlobalMinor)
+ return 0;
+ return Subminor <= GlobalSubminor;
+}
+
+#else
+
+/* Silence an empty translation unit warning. */
+typedef int unused;
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/paritydi2.c b/contrib/compiler-rt/lib/builtins/paritydi2.c
new file mode 100644
index 000000000000..8ea5ab4214e9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/paritydi2.c
@@ -0,0 +1,25 @@
+/* ===-- paritydi2.c - Implement __paritydi2 -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __paritydi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__paritydi2(di_int a)
+{
+ dwords x;
+ x.all = a;
+ return __paritysi2(x.s.high ^ x.s.low);
+}
diff --git a/contrib/compiler-rt/lib/builtins/paritysi2.c b/contrib/compiler-rt/lib/builtins/paritysi2.c
new file mode 100644
index 000000000000..599984663849
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/paritysi2.c
@@ -0,0 +1,27 @@
+/* ===-- paritysi2.c - Implement __paritysi2 -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __paritysi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__paritysi2(si_int a)
+{
+ su_int x = (su_int)a;
+ x ^= x >> 16;
+ x ^= x >> 8;
+ x ^= x >> 4;
+ return (0x6996 >> (x & 0xF)) & 1;
+}
diff --git a/contrib/compiler-rt/lib/builtins/parityti2.c b/contrib/compiler-rt/lib/builtins/parityti2.c
new file mode 100644
index 000000000000..5a4fe492486f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/parityti2.c
@@ -0,0 +1,29 @@
+/* ===-- parityti2.c - Implement __parityti2 -------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __parityti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: 1 if number of bits is odd else returns 0 */
+
+COMPILER_RT_ABI si_int
+__parityti2(ti_int a)
+{
+ twords x;
+ x.all = a;
+ return __paritydi2(x.s.high ^ x.s.low);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/popcountdi2.c b/contrib/compiler-rt/lib/builtins/popcountdi2.c
new file mode 100644
index 000000000000..5e8a62f075eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/popcountdi2.c
@@ -0,0 +1,36 @@
+/* ===-- popcountdi2.c - Implement __popcountdi2 ----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountdi2(di_int a)
+{
+ du_int x2 = (du_int)a;
+ x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
+ /* Every 2 bits holds the sum of every pair of bits (32) */
+ x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
+ /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */
+ x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
+ /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */
+ su_int x = (su_int)(x2 + (x2 >> 32));
+ /* The lower 32 bits hold four 16 bit sums (5 significant bits). */
+ /* Upper 32 bits are garbage */
+ x = x + (x >> 16);
+ /* The lower 16 bits hold two 32 bit sums (6 significant bits). */
+ /* Upper 16 bits are garbage */
+ return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */
+}
diff --git a/contrib/compiler-rt/lib/builtins/popcountsi2.c b/contrib/compiler-rt/lib/builtins/popcountsi2.c
new file mode 100644
index 000000000000..44544ff49890
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/popcountsi2.c
@@ -0,0 +1,33 @@
+/* ===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountsi2(si_int a)
+{
+ su_int x = (su_int)a;
+ x = x - ((x >> 1) & 0x55555555);
+ /* Every 2 bits holds the sum of every pair of bits */
+ x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
+ /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */
+ x = (x + (x >> 16));
+ /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/
+ /* Upper 16 bits are garbage */
+ return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */
+}
diff --git a/contrib/compiler-rt/lib/builtins/popcountti2.c b/contrib/compiler-rt/lib/builtins/popcountti2.c
new file mode 100644
index 000000000000..7451bbb286b7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/popcountti2.c
@@ -0,0 +1,44 @@
+/* ===-- popcountti2.c - Implement __popcountti2 ----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __popcountti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: count of 1 bits */
+
+COMPILER_RT_ABI si_int
+__popcountti2(ti_int a)
+{
+ tu_int x3 = (tu_int)a;
+ x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) |
+ 0x5555555555555555uLL));
+ /* Every 2 bits holds the sum of every pair of bits (64) */
+ x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL))
+ + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL));
+ /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */
+ x3 = (x3 + (x3 >> 4))
+ & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL);
+ /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */
+ du_int x2 = (du_int)(x3 + (x3 >> 64));
+ /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */
+ su_int x = (su_int)(x2 + (x2 >> 32));
+ /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */
+ x = x + (x >> 16);
+ /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */
+ /* Upper 16 bits are garbage */
+ return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/powidf2.c b/contrib/compiler-rt/lib/builtins/powidf2.c
new file mode 100644
index 000000000000..ac13b172b043
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/powidf2.c
@@ -0,0 +1,34 @@
+/* ===-- powidf2.cpp - Implement __powidf2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powidf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI double
+__powidf2(double a, si_int b)
+{
+ const int recip = b < 0;
+ double r = 1;
+ while (1)
+ {
+ if (b & 1)
+ r *= a;
+ b /= 2;
+ if (b == 0)
+ break;
+ a *= a;
+ }
+ return recip ? 1/r : r;
+}
diff --git a/contrib/compiler-rt/lib/builtins/powisf2.c b/contrib/compiler-rt/lib/builtins/powisf2.c
new file mode 100644
index 000000000000..0c400ec6dd6a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/powisf2.c
@@ -0,0 +1,34 @@
+/*===-- powisf2.cpp - Implement __powisf2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powisf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI float
+__powisf2(float a, si_int b)
+{
+ const int recip = b < 0;
+ float r = 1;
+ while (1)
+ {
+ if (b & 1)
+ r *= a;
+ b /= 2;
+ if (b == 0)
+ break;
+ a *= a;
+ }
+ return recip ? 1/r : r;
+}
diff --git a/contrib/compiler-rt/lib/builtins/powitf2.c b/contrib/compiler-rt/lib/builtins/powitf2.c
new file mode 100644
index 000000000000..172f29f58f25
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/powitf2.c
@@ -0,0 +1,38 @@
+/* ===-- powitf2.cpp - Implement __powitf2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powitf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#if _ARCH_PPC
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI long double
+__powitf2(long double a, si_int b)
+{
+ const int recip = b < 0;
+ long double r = 1;
+ while (1)
+ {
+ if (b & 1)
+ r *= a;
+ b /= 2;
+ if (b == 0)
+ break;
+ a *= a;
+ }
+ return recip ? 1/r : r;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/powixf2.c b/contrib/compiler-rt/lib/builtins/powixf2.c
new file mode 100644
index 000000000000..0fd96e503e7b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/powixf2.c
@@ -0,0 +1,38 @@
+/* ===-- powixf2.cpp - Implement __powixf2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __powixf2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#if !_ARCH_PPC
+
+#include "int_lib.h"
+
+/* Returns: a ^ b */
+
+COMPILER_RT_ABI long double
+__powixf2(long double a, si_int b)
+{
+ const int recip = b < 0;
+ long double r = 1;
+ while (1)
+ {
+ if (b & 1)
+ r *= a;
+ b /= 2;
+ if (b == 0)
+ break;
+ a *= a;
+ }
+ return recip ? 1/r : r;
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ppc/DD.h b/contrib/compiler-rt/lib/builtins/ppc/DD.h
new file mode 100644
index 000000000000..3e5f9e58c138
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/DD.h
@@ -0,0 +1,45 @@
+#ifndef COMPILERRT_DD_HEADER
+#define COMPILERRT_DD_HEADER
+
+#include "../int_lib.h"
+
+typedef union {
+ long double ld;
+ struct {
+ double hi;
+ double lo;
+ }s;
+} DD;
+
+typedef union {
+ double d;
+ uint64_t x;
+} doublebits;
+
+#define LOWORDER(xy,xHi,xLo,yHi,yLo) \
+ (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo))
+
+static __inline ALWAYS_INLINE double local_fabs(double x) {
+ doublebits result = {.d = x};
+ result.x &= UINT64_C(0x7fffffffffffffff);
+ return result.d;
+}
+
+static __inline ALWAYS_INLINE double high26bits(double x) {
+ doublebits result = {.d = x};
+ result.x &= UINT64_C(0xfffffffff8000000);
+ return result.d;
+}
+
+static __inline ALWAYS_INLINE int different_sign(double x, double y) {
+ doublebits xsignbit = {.d = x}, ysignbit = {.d = y};
+ int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
+ return result;
+}
+
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+long double __gcc_qdiv(long double, long double);
+
+#endif /* COMPILERRT_DD_HEADER */
diff --git a/contrib/compiler-rt/lib/builtins/ppc/divtc3.c b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c
new file mode 100644
index 000000000000..ef532b841145
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/divtc3.c
@@ -0,0 +1,97 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+// Use DOUBLE_PRECISION because the soft-fp method we use is logb (on the upper
+// half of the long doubles), even though this file defines complex division for
+// 128-bit floats.
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+#if !defined(CRT_INFINITY) && defined(HUGE_VAL)
+#define CRT_INFINITY HUGE_VAL
+#endif /* CRT_INFINITY */
+
+#define makeFinite(x) { \
+ (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+ (x).s.lo = 0.0; \
+ }
+
+long double _Complex
+__divtc3(long double a, long double b, long double c, long double d)
+{
+ DD cDD = { .ld = c };
+ DD dDD = { .ld = d };
+
+ int ilogbw = 0;
+ const double logbw = __compiler_rt_logb(
+ crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi)));
+
+ if (crt_isfinite(logbw))
+ {
+ ilogbw = (int)logbw;
+
+ cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw);
+ cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw);
+ dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw);
+ dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw);
+ }
+
+ const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld));
+ const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld));
+ const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld));
+
+ DD real = { .ld = __gcc_qdiv(realNumerator, denom) };
+ DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) };
+
+ real.s.hi = crt_scalbn(real.s.hi, -ilogbw);
+ real.s.lo = crt_scalbn(real.s.lo, -ilogbw);
+ imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw);
+ imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw);
+
+ if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
+ {
+ DD aDD = { .ld = a };
+ DD bDD = { .ld = b };
+ DD rDD = { .ld = denom };
+
+ if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) ||
+ !crt_isnan(bDD.s.hi)))
+ {
+ real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi;
+ real.s.lo = 0.0;
+ imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi;
+ imag.s.lo = 0.0;
+ }
+
+ else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) &&
+ crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi))
+ {
+ makeFinite(aDD);
+ makeFinite(bDD);
+ real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi);
+ real.s.lo = 0.0;
+ imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi);
+ imag.s.lo = 0.0;
+ }
+
+ else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) &&
+ crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi))
+ {
+ makeFinite(cDD);
+ makeFinite(dDD);
+ real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi));
+ real.s.lo = 0.0;
+ imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi));
+ imag.s.lo = 0.0;
+ }
+ }
+
+ long double _Complex z;
+ __real__ z = real.ld;
+ __imag__ z = imag.ld;
+
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c b/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c
new file mode 100644
index 000000000000..2c7c0f8e279f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/fixtfdi.c
@@ -0,0 +1,104 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* int64_t __fixunstfdi(long double x);
+ * This file implements the PowerPC 128-bit double-double -> int64_t conversion
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+
+uint64_t __fixtfdi(long double input)
+{
+ const DD x = { .ld = input };
+ const doublebits hibits = { .d = x.s.hi };
+
+ const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff);
+ const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000);
+
+ /* If (1.0 - tiny) <= input < 0x1.0p63: */
+ if (UINT32_C(0x03f00000) > absHighWordMinusOne)
+ {
+ /* Do an unsigned conversion of the absolute value, then restore the sign. */
+ const int unbiasedHeadExponent = absHighWordMinusOne >> 20;
+
+ int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */
+ result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */
+ result <<= 10; /* mantissa(hi) with one zero preceding bit. */
+
+ const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63;
+
+ /* If the tail is non-zero, we need to patch in the tail bits. */
+ if (0.0 != x.s.lo)
+ {
+ const doublebits lobits = { .d = x.s.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ /* At this point we have the mantissa of |tail| */
+ /* We need to negate it if head and tail have different signs. */
+ const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63;
+ const int64_t negationMask = loNegationMask ^ hiNegationMask;
+ tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+
+ /* Now we have the mantissa of tail as a signed 2s-complement integer */
+
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+
+ /* Shift the tail mantissa into the right position, accounting for the
+ * bias of 10 that we shifted the head mantissa by.
+ */
+ tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10)));
+
+ result += tailMantissa;
+ }
+
+ result >>= (62 - unbiasedHeadExponent);
+
+ /* Restore the sign of the result and return */
+ result = (result ^ hiNegationMask) - hiNegationMask;
+ return result;
+
+ }
+
+ /* Edge cases handled here: */
+
+ /* |x| < 1, result is zero. */
+ if (1.0 > crt_fabs(x.s.hi))
+ return INT64_C(0);
+
+ /* x very close to INT64_MIN, care must be taken to see which side we are on. */
+ if (x.s.hi == -0x1.0p63) {
+
+ int64_t result = INT64_MIN;
+
+ if (0.0 < x.s.lo)
+ {
+ /* If the tail is positive, the correct result is something other than INT64_MIN.
+ * we'll need to figure out what it is.
+ */
+
+ const doublebits lobits = { .d = x.s.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ /* Now we negate the tailMantissa */
+ tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1);
+
+ /* And shift it by the appropriate amount */
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+ tailMantissa >>= 1075 - biasedTailExponent;
+
+ result -= tailMantissa;
+ }
+
+ return result;
+ }
+
+ /* Signed overflows, infinities, and NaNs */
+ if (x.s.hi > 0.0)
+ return INT64_MAX;
+ else
+ return INT64_MIN;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c b/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c
new file mode 100644
index 000000000000..5e6e2cedf6ac
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/fixunstfdi.c
@@ -0,0 +1,59 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* uint64_t __fixunstfdi(long double x); */
+/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */
+
+#include "DD.h"
+
+uint64_t __fixunstfdi(long double input)
+{
+ const DD x = { .ld = input };
+ const doublebits hibits = { .d = x.s.hi };
+
+ const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000);
+
+ /* If (1.0 - tiny) <= input < 0x1.0p64: */
+ if (UINT32_C(0x04000000) > highWordMinusOne)
+ {
+ const int unbiasedHeadExponent = highWordMinusOne >> 20;
+
+ uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */
+ result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */
+ result <<= 11; /* mantissa(hi) left aligned in the int64 field. */
+
+ /* If the tail is non-zero, we need to patch in the tail bits. */
+ if (0.0 != x.s.lo)
+ {
+ const doublebits lobits = { .d = x.s.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ /* At this point we have the mantissa of |tail| */
+
+ const int64_t negationMask = ((int64_t)(lobits.x)) >> 63;
+ tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+
+ /* Now we have the mantissa of tail as a signed 2s-complement integer */
+
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+
+ /* Shift the tail mantissa into the right position, accounting for the
+ * bias of 11 that we shifted the head mantissa by.
+ */
+ tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11)));
+
+ result += tailMantissa;
+ }
+
+ result >>= (63 - unbiasedHeadExponent);
+ return result;
+ }
+
+ /* Edge cases are handled here, with saturation. */
+ if (1.0 > x.s.hi)
+ return UINT64_C(0);
+ else
+ return UINT64_MAX;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c b/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c
new file mode 100644
index 000000000000..fa21084cb576
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/fixunstfti.c
@@ -0,0 +1,106 @@
+//===-- lib/builtins/ppc/fixunstfti.c - Convert long double->int128 *-C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting the 128bit IBM/PowerPC long double (double-
+// double) data type to an unsigned 128 bit integer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../int_math.h"
+#define BIAS 1023
+
+/* Convert long double into an unsigned 128-bit integer. */
+__uint128_t __fixunstfti(long double input) {
+
+ /* If we are trying to convert a NaN, return the NaN bit pattern. */
+ if (crt_isnan(input)) {
+ return ((__uint128_t)0x7FF8000000000000ll) << 64 |
+ (__uint128_t)0x0000000000000000ll;
+ }
+
+ __uint128_t result, hiResult, loResult;
+ int hiExponent, loExponent, shift;
+ /* The long double representation, with the high and low portions of
+ * the long double, and the corresponding bit patterns of each double. */
+ union {
+ long double ld;
+ double d[2]; /* [0] is the high double, [1] is the low double. */
+ unsigned long long ull[2]; /* High and low doubles as 64-bit integers. */
+ } ldUnion;
+
+ /* If the long double is less than 1.0 or negative,
+ * return 0.0. */
+ if (input < 1.0)
+ return 0.0;
+
+ /* Retrieve the 64-bit patterns of high and low doubles.
+ * Compute the unbiased exponent of both high and low doubles by
+ * removing the signs, isolating the exponent, and subtracting
+ * the bias from it. */
+ ldUnion.ld = input;
+ hiExponent = ((ldUnion.ull[0] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS;
+ loExponent = ((ldUnion.ull[1] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS;
+
+ /* Convert each double into int64; they will be added to the int128 result.
+ * CASE 1: High or low double fits in int64
+ * - Convert the each double normally into int64.
+ *
+ * CASE 2: High or low double does not fit in int64
+ * - Scale the double to fit within a 64-bit integer
+ * - Calculate the shift (amount to scale the double by in the int128)
+ * - Clear all the bits of the exponent (with 0x800FFFFFFFFFFFFF)
+ * - Add BIAS+53 (0x4350000000000000) to exponent to correct the value
+ * - Scale (move) the double to the correct place in the int128
+ * (Move it by 2^53 places)
+ *
+ * Note: If the high double is assumed to be positive, an unsigned conversion
+ * from long double to 64-bit integer is needed. The low double can be either
+ * positive or negative, so a signed conversion is needed to retain the result
+ * of the low double and to ensure it does not simply get converted to 0. */
+
+ /* CASE 1 - High double fits in int64. */
+ if (hiExponent < 63) {
+ hiResult = (unsigned long long)ldUnion.d[0];
+ } else if (hiExponent < 128) {
+ /* CASE 2 - High double does not fit in int64, scale and convert it. */
+ shift = hiExponent - 54;
+ ldUnion.ull[0] &= 0x800FFFFFFFFFFFFFll;
+ ldUnion.ull[0] |= 0x4350000000000000ll;
+ hiResult = (unsigned long long)ldUnion.d[0];
+ hiResult <<= shift;
+ } else {
+ /* Detect cases for overflow. When the exponent of the high
+ * double is greater than 128 bits and when the long double
+ * input is positive, return the max 128-bit integer.
+ * For negative inputs with exponents > 128, return 1, like gcc. */
+ if (ldUnion.d[0] > 0) {
+ return ((__uint128_t)0xFFFFFFFFFFFFFFFFll) << 64 |
+ (__uint128_t)0xFFFFFFFFFFFFFFFFll;
+ } else {
+ return ((__uint128_t)0x0000000000000000ll) << 64 |
+ (__uint128_t)0x0000000000000001ll;
+ }
+ }
+
+ /* CASE 1 - Low double fits in int64. */
+ if (loExponent < 63) {
+ loResult = (long long)ldUnion.d[1];
+ } else {
+ /* CASE 2 - Low double does not fit in int64, scale and convert it. */
+ shift = loExponent - 54;
+ ldUnion.ull[1] &= 0x800FFFFFFFFFFFFFll;
+ ldUnion.ull[1] |= 0x4350000000000000ll;
+ loResult = (long long)ldUnion.d[1];
+ loResult <<= shift;
+ }
+
+ /* Add the high and low doublewords together to form a 128 bit integer. */
+ result = loResult + hiResult;
+ return result;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/floatditf.c b/contrib/compiler-rt/lib/builtins/ppc/floatditf.c
new file mode 100644
index 000000000000..beabdd017422
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/floatditf.c
@@ -0,0 +1,36 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatditf(long long x); */
+/* This file implements the PowerPC long long -> long double conversion */
+
+#include "DD.h"
+
+long double __floatditf(int64_t a) {
+
+ static const double twop32 = 0x1.0p32;
+ static const double twop52 = 0x1.0p52;
+
+ doublebits low = { .d = twop52 };
+ low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */
+
+ const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52;
+
+ /* At this point, we have two double precision numbers
+ * high_addend and low.d, and we wish to return their sum
+ * as a canonicalized long double:
+ */
+
+ /* This implementation sets the inexact flag spuriously.
+ * This could be avoided, but at some substantial cost.
+ */
+
+ DD result;
+
+ result.s.hi = high_addend + low.d;
+ result.s.lo = (high_addend - result.s.hi) + low.d;
+
+ return result.ld;
+
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/floattitf.c b/contrib/compiler-rt/lib/builtins/ppc/floattitf.c
new file mode 100644
index 000000000000..b8e297b6b8d9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/floattitf.c
@@ -0,0 +1,48 @@
+//===-- lib/builtins/ppc/floattitf.c - Convert int128->long double -*-C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting a signed 128 bit integer to a 128bit IBM /
+// PowerPC long double (double-double) value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+/* Conversions from signed and unsigned 64-bit int to long double. */
+long double __floatditf(int64_t);
+long double __floatunditf(uint64_t);
+
+/* Convert a signed 128-bit integer to long double.
+ * This uses the following property: Let hi and lo be 64-bits each,
+ * and let signed_val_k() and unsigned_val_k() be the value of the
+ * argument interpreted as a signed or unsigned k-bit integer. Then,
+ *
+ * signed_val_128(hi,lo) = signed_val_64(hi) * 2^64 + unsigned_val_64(lo)
+ * = (long double)hi * 2^64 + (long double)lo,
+ *
+ * where (long double)hi and (long double)lo are signed and
+ * unsigned 64-bit integer to long double conversions, respectively.
+ */
+long double __floattitf(__int128_t arg) {
+ /* Split the int128 argument into 64-bit high and low int64 parts. */
+ int64_t ArgHiPart = (int64_t)(arg >> 64);
+ uint64_t ArgLoPart = (uint64_t)arg;
+
+ /* Convert each 64-bit part into long double. The high part
+ * must be a signed conversion and the low part an unsigned conversion
+ * to ensure the correct result. */
+ long double ConvertedHiPart = __floatditf(ArgHiPart);
+ long double ConvertedLoPart = __floatunditf(ArgLoPart);
+
+ /* The low bit of ArgHiPart corresponds to the 2^64 bit in arg.
+ * Multiply the high part by 2^64 to undo the right shift by 64-bits
+ * done in the splitting. Then, add to the low part to obtain the
+ * final result. */
+ return ((ConvertedHiPart * 0x1.0p64) + ConvertedLoPart);
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c b/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c
new file mode 100644
index 000000000000..b12e1e738fd0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/floatunditf.c
@@ -0,0 +1,41 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatunditf(unsigned long long x); */
+/* This file implements the PowerPC unsigned long long -> long double conversion */
+
+#include "DD.h"
+
+long double __floatunditf(uint64_t a) {
+
+ /* Begins with an exact copy of the code from __floatundidf */
+
+ static const double twop52 = 0x1.0p52;
+ static const double twop84 = 0x1.0p84;
+ static const double twop84_plus_twop52 = 0x1.00000001p84;
+
+ doublebits high = { .d = twop84 };
+ doublebits low = { .d = twop52 };
+
+ high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */
+ low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */
+
+ const double high_addend = high.d - twop84_plus_twop52;
+
+ /* At this point, we have two double precision numbers
+ * high_addend and low.d, and we wish to return their sum
+ * as a canonicalized long double:
+ */
+
+ /* This implementation sets the inexact flag spuriously. */
+ /* This could be avoided, but at some substantial cost. */
+
+ DD result;
+
+ result.s.hi = high_addend + low.d;
+ result.s.lo = (high_addend - result.s.hi) + low.d;
+
+ return result.ld;
+
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c
new file mode 100644
index 000000000000..32e16e9d1d11
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qadd.c
@@ -0,0 +1,76 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qadd(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double add operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qadd(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.s.hi, a = dst.s.lo,
+ B = src.s.hi, b = src.s.lo;
+
+ /* If both operands are zero: */
+ if ((A == 0.0) && (B == 0.0)) {
+ dst.s.hi = A + B;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ /* If either operand is NaN or infinity: */
+ const doublebits abits = { .d = A };
+ const doublebits bbits = { .d = B };
+ if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+ (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+ dst.s.hi = A + B;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ /* If the computation overflows: */
+ /* This may be playing things a little bit fast and loose, but it will do for a start. */
+ const double testForOverflow = A + (B + (a + b));
+ const doublebits testbits = { .d = testForOverflow };
+ if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+ dst.s.hi = testForOverflow;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ double H, h;
+ double T, t;
+ double W, w;
+ double Y;
+
+ H = B + (A - (A + B));
+ T = b + (a - (a + b));
+ h = A + (B - (A + B));
+ t = a + (b - (a + b));
+
+ if (local_fabs(A) <= local_fabs(B))
+ w = (a + b) + h;
+ else
+ w = (a + b) + H;
+
+ W = (A + B) + w;
+ Y = (A + B) - W;
+ Y += w;
+
+ if (local_fabs(a) <= local_fabs(b))
+ w = t + Y;
+ else
+ w = T + Y;
+
+ dst.s.hi = Y = W + w;
+ dst.s.lo = (W - Y) + w;
+
+ return dst.ld;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c
new file mode 100644
index 000000000000..70aa00b64400
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qdiv.c
@@ -0,0 +1,55 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qdiv(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double division operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qdiv(long double a, long double b)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+ DD dst = { .ld = a }, src = { .ld = b };
+
+ register double x = dst.s.hi, x1 = dst.s.lo,
+ y = src.s.hi, y1 = src.s.lo;
+
+ double yHi, yLo, qHi, qLo;
+ double yq, tmp, q;
+
+ q = x / y;
+
+ /* Detect special cases */
+ if (q == 0.0) {
+ dst.s.hi = q;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ const doublebits qBits = { .d = q };
+ if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) {
+ dst.s.hi = q;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ yHi = high26bits(y);
+ qHi = high26bits(q);
+
+ yq = y * q;
+ yLo = y - yHi;
+ qLo = q - qHi;
+
+ tmp = LOWORDER(yq, yHi, yLo, qHi, qLo);
+ tmp = (x - yq) - tmp;
+ tmp = ((tmp + x1) - y1 * q) / y;
+ x = q + tmp;
+
+ dst.s.lo = (q - x) + tmp;
+ dst.s.hi = x;
+
+ return dst.ld;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c
new file mode 100644
index 000000000000..fb4c5164ccb5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qmul.c
@@ -0,0 +1,53 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qmul(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double multiply operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qmul(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.s.hi, a = dst.s.lo,
+ B = src.s.hi, b = src.s.lo;
+
+ double aHi, aLo, bHi, bLo;
+ double ab, tmp, tau;
+
+ ab = A * B;
+
+ /* Detect special cases */
+ if (ab == 0.0) {
+ dst.s.hi = ab;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ const doublebits abBits = { .d = ab };
+ if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) {
+ dst.s.hi = ab;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ /* Generic cases handled here. */
+ aHi = high26bits(A);
+ bHi = high26bits(B);
+ aLo = A - aHi;
+ bLo = B - bHi;
+
+ tmp = LOWORDER(ab, aHi, aLo, bHi, bLo);
+ tmp += (A * b + a * B);
+ tau = ab + tmp;
+
+ dst.s.lo = (ab - tau) + tmp;
+ dst.s.hi = tau;
+
+ return dst.ld;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c b/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c
new file mode 100644
index 000000000000..c092e24dbda1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/gcc_qsub.c
@@ -0,0 +1,76 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __gcc_qsub(long double x, long double y);
+ * This file implements the PowerPC 128-bit double-double add operation.
+ * This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+ */
+
+#include "DD.h"
+
+long double __gcc_qsub(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.s.hi, a = dst.s.lo,
+ B = -src.s.hi, b = -src.s.lo;
+
+ /* If both operands are zero: */
+ if ((A == 0.0) && (B == 0.0)) {
+ dst.s.hi = A + B;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ /* If either operand is NaN or infinity: */
+ const doublebits abits = { .d = A };
+ const doublebits bbits = { .d = B };
+ if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+ (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+ dst.s.hi = A + B;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ /* If the computation overflows: */
+ /* This may be playing things a little bit fast and loose, but it will do for a start. */
+ const double testForOverflow = A + (B + (a + b));
+ const doublebits testbits = { .d = testForOverflow };
+ if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+ dst.s.hi = testForOverflow;
+ dst.s.lo = 0.0;
+ return dst.ld;
+ }
+
+ double H, h;
+ double T, t;
+ double W, w;
+ double Y;
+
+ H = B + (A - (A + B));
+ T = b + (a - (a + b));
+ h = A + (B - (A + B));
+ t = a + (b - (a + b));
+
+ if (local_fabs(A) <= local_fabs(B))
+ w = (a + b) + h;
+ else
+ w = (a + b) + H;
+
+ W = (A + B) + w;
+ Y = (A + B) - W;
+ Y += w;
+
+ if (local_fabs(a) <= local_fabs(b))
+ w = t + Y;
+ else
+ w = T + Y;
+
+ dst.s.hi = Y = W + w;
+ dst.s.lo = (W - Y) + w;
+
+ return dst.ld;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/multc3.c b/contrib/compiler-rt/lib/builtins/ppc/multc3.c
new file mode 100644
index 000000000000..9dd79c975dde
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/multc3.c
@@ -0,0 +1,90 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#include "DD.h"
+#include "../int_math.h"
+
+#define makeFinite(x) { \
+ (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \
+ (x).s.lo = 0.0; \
+ }
+
+#define zeroNaN(x) { \
+ if (crt_isnan((x).s.hi)) { \
+ (x).s.hi = crt_copysign(0.0, (x).s.hi); \
+ (x).s.lo = 0.0; \
+ } \
+ }
+
+long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+ long double ac = __gcc_qmul(a,c);
+ long double bd = __gcc_qmul(b,d);
+ long double ad = __gcc_qmul(a,d);
+ long double bc = __gcc_qmul(b,c);
+
+ DD real = { .ld = __gcc_qsub(ac,bd) };
+ DD imag = { .ld = __gcc_qadd(ad,bc) };
+
+ if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi))
+ {
+ int recalc = 0;
+
+ DD aDD = { .ld = a };
+ DD bDD = { .ld = b };
+ DD cDD = { .ld = c };
+ DD dDD = { .ld = d };
+
+ if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi))
+ {
+ makeFinite(aDD);
+ makeFinite(bDD);
+ zeroNaN(cDD);
+ zeroNaN(dDD);
+ recalc = 1;
+ }
+
+ if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi))
+ {
+ makeFinite(cDD);
+ makeFinite(dDD);
+ zeroNaN(aDD);
+ zeroNaN(bDD);
+ recalc = 1;
+ }
+
+ if (!recalc)
+ {
+ DD acDD = { .ld = ac };
+ DD bdDD = { .ld = bd };
+ DD adDD = { .ld = ad };
+ DD bcDD = { .ld = bc };
+
+ if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) ||
+ crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi))
+ {
+ zeroNaN(aDD);
+ zeroNaN(bDD);
+ zeroNaN(cDD);
+ zeroNaN(dDD);
+ recalc = 1;
+ }
+ }
+
+ if (recalc)
+ {
+ real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi);
+ real.s.lo = 0.0;
+ imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi);
+ imag.s.lo = 0.0;
+ }
+ }
+
+ long double _Complex z;
+ __real__ z = real.ld;
+ __imag__ z = imag.ld;
+
+ return z;
+}
diff --git a/contrib/compiler-rt/lib/builtins/ppc/restFP.S b/contrib/compiler-rt/lib/builtins/ppc/restFP.S
new file mode 100644
index 000000000000..507e756e18b1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/restFP.S
@@ -0,0 +1,46 @@
+//===-- restFP.S - Implement restFP ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// Helper function used by compiler to restore ppc floating point registers at
+// the end of the function epilog. This function returns to the address
+// in the LR slot. So a function epilog must branch (b) not branch and link
+// (bl) to this function.
+// If the compiler wants to restore f27..f31, it does a "b restFP+52"
+//
+// This function should never be exported by a shared library. Each linkage
+// unit carries its own copy of this function.
+//
+DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(restFP)
+ lfd f14,-144(r1)
+ lfd f15,-136(r1)
+ lfd f16,-128(r1)
+ lfd f17,-120(r1)
+ lfd f18,-112(r1)
+ lfd f19,-104(r1)
+ lfd f20,-96(r1)
+ lfd f21,-88(r1)
+ lfd f22,-80(r1)
+ lfd f23,-72(r1)
+ lfd f24,-64(r1)
+ lfd f25,-56(r1)
+ lfd f26,-48(r1)
+ lfd f27,-40(r1)
+ lfd f28,-32(r1)
+ lfd f29,-24(r1)
+ lfd f30,-16(r1)
+ lfd f31,-8(r1)
+ lwz r0,8(r1)
+ mtlr r0
+ blr
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/ppc/saveFP.S b/contrib/compiler-rt/lib/builtins/ppc/saveFP.S
new file mode 100644
index 000000000000..20b06fff53d9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ppc/saveFP.S
@@ -0,0 +1,43 @@
+//===-- saveFP.S - Implement saveFP ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// Helper function used by compiler to save ppc floating point registers in
+// function prologs. This routines also saves r0 in the LR slot.
+// If the compiler wants to save f27..f31, it does a "bl saveFP+52"
+//
+// This function should never be exported by a shared library. Each linkage
+// unit carries its own copy of this function.
+//
+DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(saveFP)
+ stfd f14,-144(r1)
+ stfd f15,-136(r1)
+ stfd f16,-128(r1)
+ stfd f17,-120(r1)
+ stfd f18,-112(r1)
+ stfd f19,-104(r1)
+ stfd f20,-96(r1)
+ stfd f21,-88(r1)
+ stfd f22,-80(r1)
+ stfd f23,-72(r1)
+ stfd f24,-64(r1)
+ stfd f25,-56(r1)
+ stfd f26,-48(r1)
+ stfd f27,-40(r1)
+ stfd f28,-32(r1)
+ stfd f29,-24(r1)
+ stfd f30,-16(r1)
+ stfd f31,-8(r1)
+ stw r0,8(r1)
+ blr
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S b/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S
new file mode 100644
index 000000000000..a58d237040b6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S
@@ -0,0 +1,28 @@
+//===--- mulsi3.S - Integer multiplication routines routines ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(__riscv_mul) && __riscv_xlen == 32
+ .text
+ .align 2
+
+ .globl __mulsi3
+ .type __mulsi3, @function
+__mulsi3:
+ mv a2, a0
+ mv a0, zero
+.L1:
+ andi a3, a1, 1
+ beqz a3, .L2
+ add a0, a0, a2
+.L2:
+ srli a1, a1, 1
+ slli a2, a2, 1
+ bnez a1, .L1
+ ret
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4 b/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4
new file mode 100644
index 000000000000..9150a2ed8263
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/divmod.m4
@@ -0,0 +1,248 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess: define(N, 4) define(TWOSUPN, 16)
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously: define(WORDSIZE, 32)
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ * define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/N )
+ * Note that this is log_base_(2ˆN) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+
+define(dividend, `%o0')
+define(divisor,`%o1')
+define(Q, `%o2')
+define(R, `%o3')
+define(ITER, `%o4')
+define(V, `%o5')
+define(SIGN, `%g3')
+define(T, `%g1')
+define(SC,`%g2')
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=N
+ * $2 -- the current accumulation of quotient bits
+ * N -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * R -- current remainder
+ * Q -- current quotient
+ * V -- current comparand
+ * cc -- set on current value of R
+ * Dynamic output:
+ * R', Q', V', cc'
+ */
+
+#include "../assembly.h"
+
+define(DEVELOP_QUOTIENT_BITS,
+` !depth $1, accumulated bits $2
+ bl L.$1.eval(TWOSUPN+$2)
+ srl V,1,V
+ ! remainder is nonnegative
+ subcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2+1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
+ ')
+L.$1.eval(TWOSUPN+$2):
+ ! remainder is negative
+ addcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2-1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
+ ')
+ ifelse( $1, 1, `9:')
+')
+ifelse( ANSWER, `quotient', `
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ b divide
+ mov 0,SIGN ! result always nonnegative
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ xor divisor,dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+',`
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ b divide
+ mov 0,SIGN ! result always nonnegative
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ mov dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+')
+
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc divisor,%g0,V ! movcc divisor,V
+ te 2 ! if divisor = 0
+ mov dividend,R
+ mov 0,Q
+ sethi %hi(1<<(WORDSIZE-TOPBITS-1)),T
+ cmp R,T
+ blu not_really_big
+ mov 0,ITER
+ !
+ ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is N*ITER+SC, where
+ ! SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to Shift V into
+! the top decade: so don't even bother to compare to R.
+1:
+ cmp V,T
+ bgeu 3f
+ mov 1,SC
+ sll V,N,V
+ b 1b
+ inc ITER
+! Now compute SC
+2: addcc V,V,V
+ bcc not_too_big
+ add SC,1,SC
+ ! We're here if the divisor overflowed when Shifting.
+ ! This means that R has the high-order bit set.
+ ! Restore V and subtract from R.
+ sll T,TOPBITS,T ! high order bit
+ srl V,1,V ! rest of V
+ add V,T,V
+ b do_single_div
+ dec SC
+not_too_big:
+3: cmp V,R
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! V > R: went too far: back up 1 step
+! srl V,1,V
+! dec SC
+! do single-bit divide steps
+!
+! We have to be careful here. We know that R >= V, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if R >= 0. Because both R and V may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc SC
+ bl end_regular_divide
+ nop
+ sub R,V,R
+ mov 1,Q
+ b,a end_single_divloop
+ ! EMPTY
+single_divloop:
+ sll Q,1,Q
+ bl 1f
+ srl V,1,V
+ ! R >= 0
+ sub R,V,R
+ b 2f
+ inc Q
+ 1: ! R < 0
+ add R,V,R
+ dec Q
+ 2:
+ end_single_divloop:
+ deccc SC
+ bge single_divloop
+ tst R
+ b,a end_regular_divide
+ ! EMPTY
+
+not_really_big:
+1:
+ sll V,N,V
+ cmp V,R
+ bleu 1b
+ inccc ITER
+ be got_result
+ dec ITER
+do_regular_divide:
+ ! Do the main division iteration
+ tst R
+ ! Fall through into divide loop
+divloop:
+ sll Q,N,Q
+ DEVELOP_QUOTIENT_BITS( 1, 0 )
+end_regular_divide:
+ deccc ITER
+ bge divloop
+ tst R
+ bl,a got_result
+ ! non-restoring fixup if remainder < 0, otherwise annulled
+ifelse( ANSWER, `quotient',
+` dec Q
+',` add R,divisor,R
+')
+
+got_result:
+ tst SIGN
+ bl,a 1f
+ ! negate for answer < 0, otherwise annulled
+ifelse( ANSWER, `quotient',
+` neg %o2,%o2 ! Q <- -Q
+',` neg %o3,%o3 ! R <- -R
+')
+1:
+ retl ! leaf-routine return
+ifelse( ANSWER, `quotient',
+` mov %o2,%o0 ! quotient <- Q
+',` mov %o3,%o0 ! remainder <- R
+')
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S
new file mode 100644
index 000000000000..70fc1f407f48
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/divsi3.S
@@ -0,0 +1,330 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ b divide
+ mov 0,%g3 ! result always nonnegative
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ xor %o1,%o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b,a end_single_divloop
+ ! EMPTY
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b,a end_regular_divide
+ ! EMPTY
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bl,a got_result
+ ! non-restoring fixup if remainder < 0, otherwise annulled
+ dec %o2
+got_result:
+ tst %g3
+ bl,a 1f
+ ! negate for answer < 0, otherwise annulled
+ neg %o2,%o2 ! %o2 <- -%o2
+1:
+ retl ! leaf-routine return
+ mov %o2,%o0 ! quotient <- %o2
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/generate.sh b/contrib/compiler-rt/lib/builtins/sparc64/generate.sh
new file mode 100644
index 000000000000..17c1106f9900
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/generate.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S
+m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > divsi3.S
+echo '! This file intentionally left blank' > umodsi3.S
+echo '! This file intentionally left blank' > udivsi3.S
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S
new file mode 100644
index 000000000000..e126e3d3d45e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/modsi3.S
@@ -0,0 +1,330 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ b divide
+ mov 0,%g3 ! result always nonnegative
+.text
+ .align 32
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ mov %o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b,a end_single_divloop
+ ! EMPTY
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b,a end_regular_divide
+ ! EMPTY
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bl,a got_result
+ ! non-restoring fixup if remainder < 0, otherwise annulled
+ add %o3,%o1,%o3
+got_result:
+ tst %g3
+ bl,a 1f
+ ! negate for answer < 0, otherwise annulled
+ neg %o3,%o3 ! %o3 <- -%o3
+1:
+ retl ! leaf-routine return
+ mov %o3,%o0 ! remainder <- %o3
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S
new file mode 100644
index 000000000000..a41885276dfd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/udivsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
diff --git a/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S b/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S
new file mode 100644
index 000000000000..a41885276dfd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/sparc64/umodsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
diff --git a/contrib/compiler-rt/lib/builtins/subdf3.c b/contrib/compiler-rt/lib/builtins/subdf3.c
new file mode 100644
index 000000000000..a892fa603cf2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subdf3.c
@@ -0,0 +1,32 @@
+//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subdf3(fp_t a, fp_t b) {
+ return __adddf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) {
+ return __subdf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subdf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/subsf3.c b/contrib/compiler-rt/lib/builtins/subsf3.c
new file mode 100644
index 000000000000..4b2786177dcc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subsf3.c
@@ -0,0 +1,32 @@
+//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subsf3(fp_t a, fp_t b) {
+ return __addsf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) {
+ return __subsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subsf3);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/subtf3.c b/contrib/compiler-rt/lib/builtins/subtf3.c
new file mode 100644
index 000000000000..609b816f41e3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subtf3.c
@@ -0,0 +1,27 @@
+//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float subtraction with the
+// IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t
+__subtf3(fp_t a, fp_t b) {
+ return __addtf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/subvdi3.c b/contrib/compiler-rt/lib/builtins/subvdi3.c
new file mode 100644
index 000000000000..71fc70ffa92d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subvdi3.c
@@ -0,0 +1,36 @@
+/* ===-- subvdi3.c - Implement __subvdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI di_int
+__subvdi3(di_int a, di_int b)
+{
+ di_int s = (du_int) a - (du_int) b;
+ if (b >= 0)
+ {
+ if (s > a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s <= a)
+ compilerrt_abort();
+ }
+ return s;
+}
diff --git a/contrib/compiler-rt/lib/builtins/subvsi3.c b/contrib/compiler-rt/lib/builtins/subvsi3.c
new file mode 100644
index 000000000000..e6c0fb688c97
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subvsi3.c
@@ -0,0 +1,36 @@
+/* ===-- subvsi3.c - Implement __subvsi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI si_int
+__subvsi3(si_int a, si_int b)
+{
+ si_int s = (su_int) a - (su_int) b;
+ if (b >= 0)
+ {
+ if (s > a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s <= a)
+ compilerrt_abort();
+ }
+ return s;
+}
diff --git a/contrib/compiler-rt/lib/builtins/subvti3.c b/contrib/compiler-rt/lib/builtins/subvti3.c
new file mode 100644
index 000000000000..a6804d2d7b95
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/subvti3.c
@@ -0,0 +1,40 @@
+/* ===-- subvti3.c - Implement __subvti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __subvti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a - b */
+
+/* Effects: aborts if a - b overflows */
+
+COMPILER_RT_ABI ti_int
+__subvti3(ti_int a, ti_int b)
+{
+ ti_int s = (tu_int) a - (tu_int) b;
+ if (b >= 0)
+ {
+ if (s > a)
+ compilerrt_abort();
+ }
+ else
+ {
+ if (s <= a)
+ compilerrt_abort();
+ }
+ return s;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/trampoline_setup.c b/contrib/compiler-rt/lib/builtins/trampoline_setup.c
new file mode 100644
index 000000000000..25b627ab7658
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/trampoline_setup.c
@@ -0,0 +1,48 @@
+/* ===----- trampoline_setup.c - Implement __trampoline_setup -------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+extern void __clear_cache(void* start, void* end);
+
+/*
+ * The ppc compiler generates calls to __trampoline_setup() when creating
+ * trampoline functions on the stack for use with nested functions.
+ * This function creates a custom 40-byte trampoline function on the stack
+ * which loads r11 with a pointer to the outer function's locals
+ * and then jumps to the target nested function.
+ */
+
+#if __ppc__ && !defined(__powerpc64__)
+COMPILER_RT_ABI void
+__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr)
+{
+ /* should never happen, but if compiler did not allocate */
+ /* enough space on stack for the trampoline, abort */
+ if ( trampSizeAllocated < 40 )
+ compilerrt_abort();
+
+ /* create trampoline */
+ trampOnStack[0] = 0x7c0802a6; /* mflr r0 */
+ trampOnStack[1] = 0x4800000d; /* bl Lbase */
+ trampOnStack[2] = (uint32_t)realFunc;
+ trampOnStack[3] = (uint32_t)localsPtr;
+ trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */
+ trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */
+ trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */
+ trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */
+ trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */
+ trampOnStack[9] = 0x4e800420; /* bctr */
+
+ /* clear instruction cache */
+ __clear_cache(trampOnStack, &trampOnStack[10]);
+}
+#endif /* __ppc__ && !defined(__powerpc64__) */
diff --git a/contrib/compiler-rt/lib/builtins/truncdfhf2.c b/contrib/compiler-rt/lib/builtins/truncdfhf2.c
new file mode 100644
index 000000000000..8354a41b8b6f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/truncdfhf2.c
@@ -0,0 +1,26 @@
+//===-- lib/truncdfhf2.c - double -> half conversion --------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_DOUBLE
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
+ return __truncXfYf2__(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI uint16_t __aeabi_d2h(double a) {
+ return __truncdfhf2(a);
+}
+#else
+AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/truncdfsf2.c b/contrib/compiler-rt/lib/builtins/truncdfsf2.c
new file mode 100644
index 000000000000..195d3e0656e7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/truncdfsf2.c
@@ -0,0 +1,26 @@
+//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_DOUBLE
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __truncdfsf2(double a) {
+ return __truncXfYf2__(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_d2f(double a) {
+ return __truncdfsf2(a);
+}
+#else
+AEABI_RTABI float __aeabi_d2f(double a) COMPILER_RT_ALIAS(__truncdfsf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/truncsfhf2.c b/contrib/compiler-rt/lib/builtins/truncsfhf2.c
new file mode 100644
index 000000000000..9c84ab4f938a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/truncsfhf2.c
@@ -0,0 +1,32 @@
+//===-- lib/truncsfhf2.c - single -> half conversion --------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_SINGLE
+#define DST_HALF
+#include "fp_trunc_impl.inc"
+
+// Use a forwarding definition and noinline to implement a poor man's alias,
+// as there isn't a good cross-platform way of defining one.
+COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
+ return __truncXfYf2__(a);
+}
+
+COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
+ return __truncsfhf2(a);
+}
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI uint16_t __aeabi_f2h(float a) {
+ return __truncsfhf2(a);
+}
+#else
+AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2);
+#endif
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/trunctfdf2.c b/contrib/compiler-rt/lib/builtins/trunctfdf2.c
new file mode 100644
index 000000000000..741a71b33c5a
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/trunctfdf2.c
@@ -0,0 +1,22 @@
+//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_DOUBLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI double __trunctfdf2(long double a) {
+ return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/trunctfsf2.c b/contrib/compiler-rt/lib/builtins/trunctfsf2.c
new file mode 100644
index 000000000000..de96c1decf63
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/trunctfsf2.c
@@ -0,0 +1,22 @@
+//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __trunctfsf2(long double a) {
+ return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/ucmpdi2.c b/contrib/compiler-rt/lib/builtins/ucmpdi2.c
new file mode 100644
index 000000000000..40af23613b1f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ucmpdi2.c
@@ -0,0 +1,51 @@
+/* ===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ucmpdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: if (a < b) returns 0
+ * if (a == b) returns 1
+ * if (a > b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__ucmpdi2(du_int a, du_int b)
+{
+ udwords x;
+ x.all = a;
+ udwords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
+#ifdef __ARM_EABI__
+/* Returns: if (a < b) returns -1
+* if (a == b) returns 0
+* if (a > b) returns 1
+*/
+COMPILER_RT_ABI si_int
+__aeabi_ulcmp(di_int a, di_int b)
+{
+ return __ucmpdi2(a, b) - 1;
+}
+#endif
+
diff --git a/contrib/compiler-rt/lib/builtins/ucmpti2.c b/contrib/compiler-rt/lib/builtins/ucmpti2.c
new file mode 100644
index 000000000000..bda8083bb2a1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/ucmpti2.c
@@ -0,0 +1,42 @@
+/* ===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ucmpti2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: if (a < b) returns 0
+ * if (a == b) returns 1
+ * if (a > b) returns 2
+ */
+
+COMPILER_RT_ABI si_int
+__ucmpti2(tu_int a, tu_int b)
+{
+ utwords x;
+ x.all = a;
+ utwords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/udivdi3.c b/contrib/compiler-rt/lib/builtins/udivdi3.c
new file mode 100644
index 000000000000..dc68e154b109
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivdi3.c
@@ -0,0 +1,23 @@
+/* ===-- udivdi3.c - Implement __udivdi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivdi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI du_int
+__udivdi3(du_int a, du_int b)
+{
+ return __udivmoddi4(a, b, 0);
+}
diff --git a/contrib/compiler-rt/lib/builtins/udivmoddi4.c b/contrib/compiler-rt/lib/builtins/udivmoddi4.c
new file mode 100644
index 000000000000..0c8b4ff46474
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivmoddi4.c
@@ -0,0 +1,231 @@
+/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Effects: if rem != 0, *rem = a % b
+ * Returns: a / b
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI du_int
+__udivmoddi4(du_int a, du_int b, du_int* rem)
+{
+ const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ udwords n;
+ n.all = a;
+ udwords d;
+ d.all = b;
+ udwords q;
+ udwords r;
+ unsigned sr;
+ /* special cases, X is unknown, K != 0 */
+ if (n.s.high == 0)
+ {
+ if (d.s.high == 0)
+ {
+ /* 0 X
+ * ---
+ * 0 X
+ */
+ if (rem)
+ *rem = n.s.low % d.s.low;
+ return n.s.low / d.s.low;
+ }
+ /* 0 X
+ * ---
+ * K X
+ */
+ if (rem)
+ *rem = n.s.low;
+ return 0;
+ }
+ /* n.s.high != 0 */
+ if (d.s.low == 0)
+ {
+ if (d.s.high == 0)
+ {
+ /* K X
+ * ---
+ * 0 0
+ */
+ if (rem)
+ *rem = n.s.high % d.s.low;
+ return n.s.high / d.s.low;
+ }
+ /* d.s.high != 0 */
+ if (n.s.low == 0)
+ {
+ /* K 0
+ * ---
+ * K 0
+ */
+ if (rem)
+ {
+ r.s.high = n.s.high % d.s.high;
+ r.s.low = 0;
+ *rem = r.all;
+ }
+ return n.s.high / d.s.high;
+ }
+ /* K K
+ * ---
+ * K 0
+ */
+ if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */
+ {
+ if (rem)
+ {
+ r.s.low = n.s.low;
+ r.s.high = n.s.high & (d.s.high - 1);
+ *rem = r.all;
+ }
+ return n.s.high >> __builtin_ctz(d.s.high);
+ }
+ /* K K
+ * ---
+ * K 0
+ */
+ sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+ /* 0 <= sr <= n_uword_bits - 2 or sr large */
+ if (sr > n_uword_bits - 2)
+ {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ /* 1 <= sr <= n_uword_bits - 1 */
+ /* q.all = n.all << (n_udword_bits - sr); */
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ /* r.all = n.all >> sr; */
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ }
+ else /* d.s.low != 0 */
+ {
+ if (d.s.high == 0)
+ {
+ /* K X
+ * ---
+ * 0 K
+ */
+ if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */
+ {
+ if (rem)
+ *rem = n.s.low & (d.s.low - 1);
+ if (d.s.low == 1)
+ return n.all;
+ sr = __builtin_ctz(d.s.low);
+ q.s.high = n.s.high >> sr;
+ q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ return q.all;
+ }
+ /* K X
+ * ---
+ * 0 K
+ */
+ sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
+ /* 2 <= sr <= n_udword_bits - 1
+ * q.all = n.all << (n_udword_bits - sr);
+ * r.all = n.all >> sr;
+ */
+ if (sr == n_uword_bits)
+ {
+ q.s.low = 0;
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ }
+ else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1
+ {
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ }
+ else // n_uword_bits + 1 <= sr <= n_udword_bits - 1
+ {
+ q.s.low = n.s.low << (n_udword_bits - sr);
+ q.s.high = (n.s.high << (n_udword_bits - sr)) |
+ (n.s.low >> (sr - n_uword_bits));
+ r.s.high = 0;
+ r.s.low = n.s.high >> (sr - n_uword_bits);
+ }
+ }
+ else
+ {
+ /* K X
+ * ---
+ * K K
+ */
+ sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+ /* 0 <= sr <= n_uword_bits - 1 or sr large */
+ if (sr > n_uword_bits - 1)
+ {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ /* 1 <= sr <= n_uword_bits */
+ /* q.all = n.all << (n_udword_bits - sr); */
+ q.s.low = 0;
+ if (sr == n_uword_bits)
+ {
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ }
+ else
+ {
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ }
+ }
+ }
+ /* Not a special case
+ * q and r are initialized with:
+ * q.all = n.all << (n_udword_bits - sr);
+ * r.all = n.all >> sr;
+ * 1 <= sr <= n_udword_bits - 1
+ */
+ su_int carry = 0;
+ for (; sr > 0; --sr)
+ {
+ /* r:q = ((r:q) << 1) | carry */
+ r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
+ r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
+ q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
+ q.s.low = (q.s.low << 1) | carry;
+ /* carry = 0;
+ * if (r.all >= d.all)
+ * {
+ * r.all -= d.all;
+ * carry = 1;
+ * }
+ */
+ const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+ carry = s & 1;
+ r.all -= d.all & s;
+ }
+ q.all = (q.all << 1) | carry;
+ if (rem)
+ *rem = r.all;
+ return q.all;
+}
diff --git a/contrib/compiler-rt/lib/builtins/udivmodsi4.c b/contrib/compiler-rt/lib/builtins/udivmodsi4.c
new file mode 100644
index 000000000000..789c4b5061e4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivmodsi4.c
@@ -0,0 +1,27 @@
+/*===-- udivmodsi4.c - Implement __udivmodsi4 ------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmodsi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b, *rem = a % b */
+
+COMPILER_RT_ABI su_int
+__udivmodsi4(su_int a, su_int b, su_int* rem)
+{
+ si_int d = __udivsi3(a,b);
+ *rem = a - (d*b);
+ return d;
+}
+
+
diff --git a/contrib/compiler-rt/lib/builtins/udivmodti4.c b/contrib/compiler-rt/lib/builtins/udivmodti4.c
new file mode 100644
index 000000000000..803168849c6c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivmodti4.c
@@ -0,0 +1,238 @@
+/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmodti4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Effects: if rem != 0, *rem = a % b
+ * Returns: a / b
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI tu_int
+__udivmodti4(tu_int a, tu_int b, tu_int* rem)
+{
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+ utwords n;
+ n.all = a;
+ utwords d;
+ d.all = b;
+ utwords q;
+ utwords r;
+ unsigned sr;
+ /* special cases, X is unknown, K != 0 */
+ if (n.s.high == 0)
+ {
+ if (d.s.high == 0)
+ {
+ /* 0 X
+ * ---
+ * 0 X
+ */
+ if (rem)
+ *rem = n.s.low % d.s.low;
+ return n.s.low / d.s.low;
+ }
+ /* 0 X
+ * ---
+ * K X
+ */
+ if (rem)
+ *rem = n.s.low;
+ return 0;
+ }
+ /* n.s.high != 0 */
+ if (d.s.low == 0)
+ {
+ if (d.s.high == 0)
+ {
+ /* K X
+ * ---
+ * 0 0
+ */
+ if (rem)
+ *rem = n.s.high % d.s.low;
+ return n.s.high / d.s.low;
+ }
+ /* d.s.high != 0 */
+ if (n.s.low == 0)
+ {
+ /* K 0
+ * ---
+ * K 0
+ */
+ if (rem)
+ {
+ r.s.high = n.s.high % d.s.high;
+ r.s.low = 0;
+ *rem = r.all;
+ }
+ return n.s.high / d.s.high;
+ }
+ /* K K
+ * ---
+ * K 0
+ */
+ if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */
+ {
+ if (rem)
+ {
+ r.s.low = n.s.low;
+ r.s.high = n.s.high & (d.s.high - 1);
+ *rem = r.all;
+ }
+ return n.s.high >> __builtin_ctzll(d.s.high);
+ }
+ /* K K
+ * ---
+ * K 0
+ */
+ sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+ /* 0 <= sr <= n_udword_bits - 2 or sr large */
+ if (sr > n_udword_bits - 2)
+ {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ /* 1 <= sr <= n_udword_bits - 1 */
+ /* q.all = n.all << (n_utword_bits - sr); */
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_udword_bits - sr);
+ /* r.all = n.all >> sr; */
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+ }
+ else /* d.s.low != 0 */
+ {
+ if (d.s.high == 0)
+ {
+ /* K X
+ * ---
+ * 0 K
+ */
+ if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */
+ {
+ if (rem)
+ *rem = n.s.low & (d.s.low - 1);
+ if (d.s.low == 1)
+ return n.all;
+ sr = __builtin_ctzll(d.s.low);
+ q.s.high = n.s.high >> sr;
+ q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+ return q.all;
+ }
+ /* K X
+ * ---
+ * 0 K
+ */
+ sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
+ - __builtin_clzll(n.s.high);
+ /* 2 <= sr <= n_utword_bits - 1
+ * q.all = n.all << (n_utword_bits - sr);
+ * r.all = n.all >> sr;
+ */
+ if (sr == n_udword_bits)
+ {
+ q.s.low = 0;
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ }
+ else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1
+ {
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_udword_bits - sr);
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+ }
+ else // n_udword_bits + 1 <= sr <= n_utword_bits - 1
+ {
+ q.s.low = n.s.low << (n_utword_bits - sr);
+ q.s.high = (n.s.high << (n_utword_bits - sr)) |
+ (n.s.low >> (sr - n_udword_bits));
+ r.s.high = 0;
+ r.s.low = n.s.high >> (sr - n_udword_bits);
+ }
+ }
+ else
+ {
+ /* K X
+ * ---
+ * K K
+ */
+ sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
+ /*0 <= sr <= n_udword_bits - 1 or sr large */
+ if (sr > n_udword_bits - 1)
+ {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ /* 1 <= sr <= n_udword_bits
+ * q.all = n.all << (n_utword_bits - sr);
+ * r.all = n.all >> sr;
+ */
+ q.s.low = 0;
+ if (sr == n_udword_bits)
+ {
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ }
+ else
+ {
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
+ q.s.high = n.s.low << (n_udword_bits - sr);
+ }
+ }
+ }
+ /* Not a special case
+ * q and r are initialized with:
+ * q.all = n.all << (n_utword_bits - sr);
+ * r.all = n.all >> sr;
+ * 1 <= sr <= n_utword_bits - 1
+ */
+ su_int carry = 0;
+ for (; sr > 0; --sr)
+ {
+ /* r:q = ((r:q) << 1) | carry */
+ r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
+ r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
+ q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
+ q.s.low = (q.s.low << 1) | carry;
+ /* carry = 0;
+ * if (r.all >= d.all)
+ * {
+ * r.all -= d.all;
+ * carry = 1;
+ * }
+ */
+ const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
+ carry = s & 1;
+ r.all -= d.all & s;
+ }
+ q.all = (q.all << 1) | carry;
+ if (rem)
+ *rem = r.all;
+ return q.all;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/udivsi3.c b/contrib/compiler-rt/lib/builtins/udivsi3.c
new file mode 100644
index 000000000000..bb720f8c382b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivsi3.c
@@ -0,0 +1,68 @@
+/* ===-- udivsi3.c - Implement __udivsi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a / b */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+/* This function should not call __divsi3! */
+COMPILER_RT_ABI su_int
+__udivsi3(su_int n, su_int d)
+{
+ const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+ su_int q;
+ su_int r;
+ unsigned sr;
+ /* special cases */
+ if (d == 0)
+ return 0; /* ?! */
+ if (n == 0)
+ return 0;
+ sr = __builtin_clz(d) - __builtin_clz(n);
+ /* 0 <= sr <= n_uword_bits - 1 or sr large */
+ if (sr > n_uword_bits - 1) /* d > r */
+ return 0;
+ if (sr == n_uword_bits - 1) /* d == 1 */
+ return n;
+ ++sr;
+ /* 1 <= sr <= n_uword_bits - 1 */
+ /* Not a special case */
+ q = n << (n_uword_bits - sr);
+ r = n >> sr;
+ su_int carry = 0;
+ for (; sr > 0; --sr)
+ {
+ /* r:q = ((r:q) << 1) | carry */
+ r = (r << 1) | (q >> (n_uword_bits - 1));
+ q = (q << 1) | carry;
+ /* carry = 0;
+ * if (r.all >= d.all)
+ * {
+ * r.all -= d.all;
+ * carry = 1;
+ * }
+ */
+ const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1);
+ carry = s & 1;
+ r -= d & s;
+ }
+ q = (q << 1) | carry;
+ return q;
+}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) COMPILER_RT_ALIAS(__udivsi3);
+#endif
diff --git a/contrib/compiler-rt/lib/builtins/udivti3.c b/contrib/compiler-rt/lib/builtins/udivti3.c
new file mode 100644
index 000000000000..ec94673e25b0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/udivti3.c
@@ -0,0 +1,27 @@
+/* ===-- udivti3.c - Implement __udivti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a / b */
+
+COMPILER_RT_ABI tu_int
+__udivti3(tu_int a, tu_int b)
+{
+ return __udivmodti4(a, b, 0);
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/umoddi3.c b/contrib/compiler-rt/lib/builtins/umoddi3.c
new file mode 100644
index 000000000000..d513f080a1e9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/umoddi3.c
@@ -0,0 +1,25 @@
+/* ===-- umoddi3.c - Implement __umoddi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umoddi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI du_int
+__umoddi3(du_int a, du_int b)
+{
+ du_int r;
+ __udivmoddi4(a, b, &r);
+ return r;
+}
diff --git a/contrib/compiler-rt/lib/builtins/umodsi3.c b/contrib/compiler-rt/lib/builtins/umodsi3.c
new file mode 100644
index 000000000000..d5fda4a6af1f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/umodsi3.c
@@ -0,0 +1,23 @@
+/* ===-- umodsi3.c - Implement __umodsi3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umodsi3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI su_int
+__umodsi3(su_int a, su_int b)
+{
+ return a - __udivsi3(a, b) * b;
+}
diff --git a/contrib/compiler-rt/lib/builtins/umodti3.c b/contrib/compiler-rt/lib/builtins/umodti3.c
new file mode 100644
index 000000000000..6d1ca7a8cf6f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/umodti3.c
@@ -0,0 +1,29 @@
+/* ===-- umodti3.c - Implement __umodti3 -----------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __umodti3 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+#ifdef CRT_HAS_128BIT
+
+/* Returns: a % b */
+
+COMPILER_RT_ABI tu_int
+__umodti3(tu_int a, tu_int b)
+{
+ tu_int r;
+ __udivmodti4(a, b, &r);
+ return r;
+}
+
+#endif /* CRT_HAS_128BIT */
diff --git a/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h b/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h
new file mode 100644
index 000000000000..ccb0765975a9
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/unwind-ehabi-helpers.h
@@ -0,0 +1,55 @@
+/* ===-- arm-ehabi-helpers.h - Supplementary ARM EHABI declarations --------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===--------------------------------------------------------------------=== */
+
+#ifndef UNWIND_EHABI_HELPERS_H
+#define UNWIND_EHABI_HELPERS_H
+
+#include <stdint.h>
+/* NOTE: see reasoning for this inclusion below */
+#include <unwind.h>
+
+#if !defined(__ARM_EABI_UNWINDER__)
+
+/*
+ * NOTE: _URC_OK, _URC_FAILURE must be present as preprocessor tokens. This
+ * allows for a substitution of a constant which can be cast into the
+ * appropriate enumerated type. This header is expected to always be included
+ * AFTER unwind.h (which is why it is forcefully included above). This ensures
+ * that we do not overwrite the token for the enumeration. Subsequent uses of
+ * the token would be clean to rewrite with constant values.
+ *
+ * The typedef redeclaration should be safe. Due to the protection granted to
+ * us by the `__ARM_EABI_UNWINDER__` above, we are guaranteed that we are in a
+ * header not vended by gcc. The HP unwinder (being an itanium unwinder) does
+ * not support EHABI, and the GNU unwinder, derived from the HP unwinder, also
+ * does not support EHABI as of the introduction of this header. As such, we
+ * are fairly certain that we are in the LLVM case. Here, _Unwind_State is a
+ * typedef, and so we can get away with a redeclaration.
+ *
+ * Guarded redefinitions of the needed unwind state prevent the redefinition of
+ * those states.
+ */
+
+#define _URC_OK 0
+#define _URC_FAILURE 9
+
+typedef uint32_t _Unwind_State;
+
+#if !defined(_US_UNWIND_FRAME_STARTING)
+#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
+#endif
+
+#if !defined(_US_ACTION_MASK)
+#define _US_ACTION_MASK ((_Unwind_State)3)
+#endif
+
+#endif
+
+#endif
+
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S
new file mode 100644
index 000000000000..4149ac63d9d0
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk.S
@@ -0,0 +1,39 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// _chkstk routine
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// Notes from r227519
+// MSVC x64s __chkstk and cygmings ___chkstk_ms do not adjust %rsp
+// themselves. It also does not clobber %rax so we can reuse it when
+// adjusting %rsp.
+
+#ifdef __x86_64__
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(___chkstk_ms)
+ push %rcx
+ push %rax
+ cmp $0x1000,%rax
+ lea 24(%rsp),%rcx
+ jb 1f
+2:
+ sub $0x1000,%rcx
+ test %rcx,(%rcx)
+ sub $0x1000,%rax
+ cmp $0x1000,%rax
+ ja 2b
+1:
+ sub %rax,%rcx
+ test %rcx,(%rcx)
+ pop %rax
+ pop %rcx
+ ret
+END_COMPILERRT_FUNCTION(___chkstk_ms)
+
+#endif // __x86_64__
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S
new file mode 100644
index 000000000000..ac1eb920e0e8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/chkstk2.S
@@ -0,0 +1,42 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+#ifdef __x86_64__
+
+// _chkstk (_alloca) routine - probe stack between %rsp and (%rsp-%rax) in 4k increments,
+// then decrement %rsp by %rax. Preserves all registers except %rsp and flags.
+// This routine is windows specific
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__alloca)
+ mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx
+ // fallthrough
+DEFINE_COMPILERRT_FUNCTION(___chkstk)
+ push %rcx
+ cmp $0x1000,%rax
+ lea 16(%rsp),%rcx // rsp before calling this routine -> rcx
+ jb 1f
+2:
+ sub $0x1000,%rcx
+ test %rcx,(%rcx)
+ sub $0x1000,%rax
+ cmp $0x1000,%rax
+ ja 2b
+1:
+ sub %rax,%rcx
+ test %rcx,(%rcx)
+
+ lea 8(%rsp),%rax // load pointer to the return address into rax
+ mov %rcx,%rsp // install the new top of stack pointer into rsp
+ mov -8(%rax),%rcx // restore rcx
+ push (%rax) // push return address onto the stack
+ sub %rsp,%rax // restore the original value in rax
+ ret
+END_COMPILERRT_FUNCTION(___chkstk)
+END_COMPILERRT_FUNCTION(__alloca)
+
+#endif // __x86_64__
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c
new file mode 100644
index 000000000000..dead0ed42c65
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdidf.c
@@ -0,0 +1,16 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* double __floatdidf(di_int a); */
+
+#if defined(__x86_64__) || defined(_M_X64)
+
+#include "../int_lib.h"
+
+double __floatdidf(int64_t a)
+{
+ return (double)a;
+}
+
+#endif /* __x86_64__ */
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c
new file mode 100644
index 000000000000..99d5621c6327
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdisf.c
@@ -0,0 +1,14 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#if defined(__x86_64__) || defined(_M_X64)
+
+#include "../int_lib.h"
+
+float __floatdisf(int64_t a)
+{
+ return (float)a;
+}
+
+#endif /* __x86_64__ */
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c b/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c
new file mode 100644
index 000000000000..c01193a82b5e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatdixf.c
@@ -0,0 +1,16 @@
+/* This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+/* long double __floatdixf(di_int a); */
+
+#ifdef __x86_64__
+
+#include "../int_lib.h"
+
+long double __floatdixf(int64_t a)
+{
+ return (long double)a;
+}
+
+#endif /* __i386__ */
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S
new file mode 100644
index 000000000000..094a68dc3cd4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundidf.S
@@ -0,0 +1,52 @@
+//===-- floatundidf.S - Implement __floatundidf for x86_64 ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// double __floatundidf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+ .balign 16
+twop52:
+ .quad 0x4330000000000000
+
+ .balign 16
+twop84_plus_twop52:
+ .quad 0x4530000000100000
+
+ .balign 16
+twop84:
+ .quad 0x4530000000000000
+
+#define REL_ADDR(_a) (_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundidf)
+ movd %edi, %xmm0 // low 32 bits of a
+ shrq $32, %rdi // high 32 bits of a
+ orq REL_ADDR(twop84), %rdi // 0x1p84 + a_hi (no rounding occurs)
+ orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs)
+ movd %rdi, %xmm1
+ subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs)
+ addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here)
+ ret
+END_COMPILERRT_FUNCTION(__floatundidf)
+
+#endif // __x86_64__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S
new file mode 100644
index 000000000000..7c9f75e188eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundisf.S
@@ -0,0 +1,38 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// float __floatundisf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+ .balign 16
+two:
+ .single 2.0
+
+#define REL_ADDR(_a) (_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundisf)
+ movq $1, %rsi
+ testq %rdi, %rdi
+ js 1f
+ cvtsi2ssq %rdi, %xmm0
+ ret
+
+1: andq %rdi, %rsi
+ shrq %rdi
+ orq %rsi, %rdi
+ cvtsi2ssq %rdi, %xmm0
+ mulss REL_ADDR(two), %xmm0
+ ret
+END_COMPILERRT_FUNCTION(__floatundisf)
+
+#endif // __x86_64__
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S b/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S
new file mode 100644
index 000000000000..28a096b71373
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/x86_64/floatundixf.S
@@ -0,0 +1,71 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// long double __floatundixf(du_int a);
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+ .balign 16
+twop64:
+ .quad 0x43f0000000000000
+
+#define REL_ADDR(_a) (_a)(%rip)
+
+ .text
+
+ .balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+ movq %rdi, -8(%rsp)
+ fildq -8(%rsp)
+ test %rdi, %rdi
+ js 1f
+ ret
+1: faddl REL_ADDR(twop64)
+ ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __x86_64__
+
+
+/* Branch-free implementation is ever so slightly slower, but more beautiful.
+ It is likely superior for inlining, so I kept it around for future reference.
+
+#ifdef __x86_64__
+
+CONST_SECTION
+
+ .balign 4
+twop52:
+ .quad 0x4330000000000000
+twop84_plus_twop52_neg:
+ .quad 0xc530000000100000
+twop84:
+ .quad 0x4530000000000000
+
+#define REL_ADDR(_a) (_a)(%rip)
+
+.text
+.balign 4
+DEFINE_COMPILERRT_FUNCTION(__floatundixf)
+ movl %edi, %esi // low 32 bits of input
+ shrq $32, %rdi // hi 32 bits of input
+ orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double)
+ orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double)
+ movq %rdi, -8(%rsp)
+ movq %rsi, -16(%rsp)
+ fldl REL_ADDR(twop84_plus_twop52_neg)
+ faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs)
+ faddl -16(%rsp) // hi + lo (as double extended)
+ ret
+END_COMPILERRT_FUNCTION(__floatundixf)
+
+#endif // __x86_64__
+
+*/
+
+NO_EXEC_STACK_DIRECTIVE
+