93 files changed, 4542 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S
new file mode 100644
index 000000000000..8e476cad1624
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/adddf3vfp.S
@@ -0,0 +1,33 @@
+//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// double __adddf3vfp(double a, double b) { return a + b; }
+//
+// Adds two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f64 d0, d0, d1
+#else
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vadd.f64 d6, d6, d7		
+	vmov	r0, r1, d6		// move result back to r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__adddf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3.S b/contrib/compiler-rt/lib/builtins/arm/addsf3.S
new file mode 100644
index 000000000000..74723cbeff74
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/addsf3.S
@@ -0,0 +1,277 @@
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+	.syntax unified
+	.text
+  .thumb
+  .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+  push {r4, r5, r6, r7, lr}
+  // Get the absolute value of a and b.
+  lsls r2, r0, #1
+  lsls r3, r1, #1
+  lsrs r2, r2, #1  /* aAbs */
+  beq  LOCAL_LABEL(a_zero_nan_inf)
+  lsrs r3, r3, #1  /* bAbs */
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Detect if a or b is infinity or Nan.
+  lsrs r6, r2, #(significandBits)
+  lsrs r7, r3, #(significandBits)
+  cmp  r6, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+  cmp  r7, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+  cmp r2, r3
+  bhs LOCAL_LABEL(no_swap)
+  movs r4, r0
+  movs r5, r2
+  movs r0, r1
+  movs r2, r3
+  movs r1, r4
+  movs r3, r5
+LOCAL_LABEL(no_swap):
+
+  // Get the significands and shift them to give us round, guard and sticky.
+  lsls r4, r0, #(typeWidth - significandBits)
+  lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+  lsls r5, r1, #(typeWidth - significandBits)
+  lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+  // Get the implicitBit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3)
+
+  // Get aExponent and set implicit bit if necessary.
+  lsrs r2, r2, #(significandBits)
+  beq LOCAL_LABEL(a_done_implicit_bit)
+  orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+  // Get bExponent and set implicit bit if necessary.
+  lsrs r3, r3, #(significandBits)
+  beq LOCAL_LABEL(b_done_implicit_bit)
+  orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+  // Get the difference in exponents.
+  subs r6, r2, r3
+  beq LOCAL_LABEL(done_align)
+
+  // If b is denormal, then a must be normal as align > 0, and we only need to
+  // right shift bSignificand by (align - 1) bits.
+  cmp  r3, #0
+  bne  1f
+  subs r6, r6, #1
+1:
+
+  // No longer needs bExponent. r3 is dead here.
+  // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+  movs r3, #(typeWidth)
+  subs r3, r3, r6
+  movs r7, r5
+  lsls r7, r3
+  beq 1f
+  movs r7, #1
+1:
+
+  // bSignificand = bSignificand >> align | sticky;
+  lsrs r5, r6
+  orrs r5, r7
+  bne LOCAL_LABEL(done_align)
+  movs r5, #1 //  sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+  // isSubtraction = (aRep ^ bRep) >> 31;
+  movs r7, r0
+  eors r7, r1
+  lsrs r7, #31
+  bne LOCAL_LABEL(do_substraction)
+
+  // Same sign, do Addition.
+
+  // aSignificand += bSignificand;
+  adds r4, r4, r5
+
+  // Check carry bit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3 + 1)
+  movs r7, r4
+  ands r7, r6
+  beq LOCAL_LABEL(form_result)
+  // If the addition carried up, we need to right-shift the result and
+  // adjust the exponent.
+  movs r7, r4
+  movs r6, #1
+  ands r7, r6 // sticky = aSignificand & 1;
+  lsrs r4, #1
+  orrs r4, r7  // result Significand
+  adds r2, #1  // result Exponent
+  // If we have overflowed the type, return +/- infinity.
+  cmp  r2, 0xFF
+  beq  LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+  // Shift the sign, exponent and significand into place.
+  lsrs r0, #(typeWidth - 1)
+  lsls r0, #(typeWidth - 1) // Get Sign.
+  lsls r2, #(significandBits)
+  orrs r0, r2
+  movs r1, r4
+  lsls r4, #(typeWidth - significandBits - 3)
+  lsrs r4, #(typeWidth - significandBits)
+  orrs r0, r4
+
+  // Final rounding.  The result may overflow to infinity, but that is the
+  // correct result in that case.
+  // roundGuardSticky = aSignificand & 0x7;
+  movs r2, #0x7
+  ands r1, r2
+  // if (roundGuardSticky > 0x4) result++;
+
+  cmp r1, #0x4
+  blt LOCAL_LABEL(done_round)
+  beq 1f
+  adds r0, #1
+  pop {r4, r5, r6, r7, pc}
+1:
+
+  // if (roundGuardSticky == 0x4) result += result & 1;
+  movs r1, r0
+  lsrs r1, #1
+  bcc  LOCAL_LABEL(done_round)
+  adds r0, r0, #1
+LOCAL_LABEL(done_round):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+  subs r4, r4, r5 // aSignificand -= bSignificand;
+  beq  LOCAL_LABEL(ret_zero)
+  movs r6, r4
+  cmp  r2, 0
+  beq  LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+  // If partial cancellation occured, we need to left-shift the result
+  // and adjust the exponent:
+  lsrs r6, r6, #(significandBits + 3)
+  bne LOCAL_LABEL(form_result)
+
+  push {r0, r1, r2, r3}
+  movs r0, r4
+  bl   SYMBOL_NAME(__clzsi2)
+  movs r5, r0
+  pop {r0, r1, r2, r3}
+  // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+  subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+  // aSignificand <<= shift; aExponent -= shift;
+  lsls r4, r5
+  subs  r2, r2, r5
+  bgt LOCAL_LABEL(form_result)
+
+  // Do normalization if aExponent <= 0.
+  movs r6, #1
+  subs r6, r6, r2 // 1 - aExponent;
+  movs r2, #0 // aExponent = 0;
+  movs r3, #(typeWidth) // bExponent is dead.
+  subs r3, r3, r6
+  movs r7, r4
+  lsls r7, r3  // stickyBit = (bool)(aSignificant << (typeWidth - align))
+  beq 1f
+  movs r7, #1
+1:
+  lsrs r4, r6 /* aSignificand >> shift */
+  orrs r4, r7
+  b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+  movs r0, #0
+  pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+  lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+  // Here  r2 has aAbs, r3 has bAbs
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits) // Make +inf.
+
+  cmp r2, r4
+  bhi LOCAL_LABEL(a_is_nan)
+  cmp r3, r4
+  bhi LOCAL_LABEL(b_is_nan)
+
+  cmp r2, r4
+  bne LOCAL_LABEL(a_is_rational)
+  // aAbs is INF.
+  eors r1, r0 // aRep ^ bRep.
+  movs r6, #1
+  lsls r6, r6, #(typeWidth - 1) // get sign mask.
+  cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+  beq LOCAL_LABEL(a_is_nan)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+  cmp r3, r4
+  bne LOCAL_LABEL(b_is_rational)
+  movs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+  // either a or b or both are zero.
+  adds r4, r2, r3
+  beq  LOCAL_LABEL(both_zero)
+  cmp r2, #0 // is absA 0 ?
+  beq LOCAL_LABEL(ret_b)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+  ands r0, r1 // +0 + -0 = +0
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+  movs r0, r1
+
+LOCAL_LABEL(ret):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+  movs r0, r1
+LOCAL_LABEL(a_is_nan):
+  movs r1, #1
+  lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+  orrs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits)
+  orrs r0, r4
+  lsrs r0, r0, #(significandBits)
+  lsls r0, r0, #(significandBits)
+  pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S
new file mode 100644
index 000000000000..8871efdcc5d1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/addsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __addsf3vfp(float a, float b);
+//
+// Adds two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed in GPRs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f32 s0, s0, s1
+#else
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vadd.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__addsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
new file mode 100644
index 000000000000..adc2d55d90f5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S
@@ -0,0 +1,145 @@
+//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cdcmpeq(double a, double b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cdcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cdcmpeq_check_nan
+        cmp r0, #1
+#if defined(USE_THUMB_1)
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cdcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        // Use "it ne" + unconditional branch to guarantee a supported relocation if
+        // __aeabi_cdcmple is in a different section for some builds.
+        IT(ne)
+        bne __aeabi_cdcmple
+
+#if defined(USE_THUMB_2)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
+        msr APSR_nzcvq, #APSR_C
+#endif
+        JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
+
+
+// void __aeabi_cdcmple(double a, double b) {
+//   if (__aeabi_dcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_dcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_dcmplt
+        cmp r0, #1
+#if defined(USE_THUMB_1)
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        ITE(eq)
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr APSR_nzcvq, ip
+        pop {r0-r3}
+        POP_PC()
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
+
+// int __aeabi_cdrcmple(double a, double b) {
+//   return __aeabi_cdcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+        // Swap r0 and r2
+        mov ip, r0
+        mov r0, r2
+        mov r2, ip
+
+        // Swap r1 and r3
+        mov ip, r1
+        mov r1, r3
+        mov r3, ip
+
+        b __aeabi_cdcmple
+END_COMPILERRT_FUNCTION(__aeabi_cdrcmple)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
new file mode 100644
index 000000000000..7578433a1df7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+#include "../int_lib.h"
+
+AEABI_RTABI __attribute__((visibility("hidden")))
+int __aeabi_cdcmpeq_check_nan(double a, double b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
new file mode 100644
index 000000000000..4b1de997687f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S
@@ -0,0 +1,140 @@
+//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error big endian support not implemented
+#endif
+
+#define APSR_Z (1 << 30)
+#define APSR_C (1 << 29)
+
+// void __aeabi_cfcmpeq(float a, float b) {
+//   if (isnan(a) || isnan(b)) {
+//     Z = 0; C = 1;
+//   } else {
+//     __aeabi_cfcmple(a, b);
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+        push {r0-r3, lr}
+        bl __aeabi_cfcmpeq_check_nan
+        cmp r0, #1
+#if defined(USE_THUMB_1)
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cfcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        pop {r0-r3, lr}
+
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        // Use "it ne" + unconditional branch to guarantee a supported relocation if
+        // __aeabi_cfcmple is in a different section for some builds.
+        IT(ne)
+        bne __aeabi_cfcmple
+
+#if defined(USE_THUMB_2)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
+        msr APSR_nzcvq, #APSR_C
+#endif
+        JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
+
+
+// void __aeabi_cfcmple(float a, float b) {
+//   if (__aeabi_fcmplt(a, b)) {
+//     Z = 0; C = 0;
+//   } else if (__aeabi_fcmpeq(a, b)) {
+//     Z = 1; C = 1;
+//   } else {
+//     Z = 0; C = 1;
+//   }
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+        // Per the RTABI, this function must preserve r0-r11.
+        // Save lr in the same instruction for compactness
+        push {r0-r3, lr}
+
+        bl __aeabi_fcmplt
+        cmp r0, #1
+#if defined(USE_THUMB_1)
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
+        moveq ip, #0
+        beq 1f
+
+        ldm sp, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        ITE(eq)
+        moveq ip, #(APSR_C | APSR_Z)
+        movne ip, #(APSR_C)
+
+1:
+        msr APSR_nzcvq, ip
+        pop {r0-r3}
+        POP_PC()
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
+
+// int __aeabi_cfrcmple(float a, float b) {
+//   return __aeabi_cfcmple(b, a);
+// }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+        // Swap r0 and r1
+        mov ip, r0
+        mov r0, r1
+        mov r1, ip
+
+        b __aeabi_cfcmple
+END_COMPILERRT_FUNCTION(__aeabi_cfrcmple)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
new file mode 100644
index 000000000000..43dde9a49597
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -0,0 +1,16 @@
+//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+#include "../int_lib.h"
+
+AEABI_RTABI __attribute__((visibility("hidden")))
+int __aeabi_cfcmpeq_check_nan(float a, float b) {
+    return __builtin_isnan(a) || __builtin_isnan(b);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
new file mode 100644
index 000000000000..9fa78b461248
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
@@ -0,0 +1,52 @@
+//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+//   int result = __{eq,lt,le,ge,gt}df2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS                    \
+        vmov      d0, r0, r1                     SEPARATOR \
+        vmov      d1, r2, r3
+#else
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
+#define DEFINE_AEABI_DCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c
new file mode 100644
index 000000000000..dc3031326e37
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_div0.c
@@ -0,0 +1,45 @@
+/* ===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements the division by zero helper routines as specified by the
+ * Run-time ABI for the ARM Architecture.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/*
+ * RTABI 4.3.2 - Division by zero
+ *
+ * The *div0 functions:
+ * - Return the value passed to them as a parameter
+ * - Or, return a fixed value defined by the execution environment (such as 0)
+ * - Or, raise a signal (often SIGFPE) or throw an exception, and do not return
+ *
+ * An application may provide its own implementations of the *div0 functions to
+ * for a particular behaviour from the *div and *divmod functions called out of
+ * line.
+ */
+
+#include "../int_lib.h"
+
+/* provide an unused declaration to pacify pendantic compilation */
+extern unsigned char declaration;
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_idiv0(int return_value) {
+  return return_value;
+}
+
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
+__aeabi_ldiv0(long long return_value) {
+  return return_value;
+}
+#endif
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
new file mode 100644
index 000000000000..1254886086fb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_drsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "../fp_lib.h"
+
+AEABI_RTABI fp_t
+__aeabi_dsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t
+__aeabi_drsub(fp_t a, fp_t b) {
+    return __aeabi_dsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
new file mode 100644
index 000000000000..ea5b96c21d57
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
@@ -0,0 +1,52 @@
+//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS                    \
+        vmov      s0, r0                         SEPARATOR \
+        vmov      s1, r1
+#else
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
+#define DEFINE_AEABI_FCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
new file mode 100644
index 000000000000..34f2303745bc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_frsub.c
@@ -0,0 +1,19 @@
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "../fp_lib.h"
+
+AEABI_RTABI fp_t
+__aeabi_fsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t
+__aeabi_frsub(fp_t a, fp_t b) {
+    return __aeabi_fsub(b, a);
+}
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S
new file mode 100644
index 000000000000..9c9c80ab5a7b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_idivmod.S
@@ -0,0 +1,51 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if defined(USE_THUMB_1)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__divsi3)
+        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+        muls    r2, r0, r2   // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+#else  // defined(USE_THUMB_1)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+#endif //  defined(USE_THUMB_1)
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S
new file mode 100644
index 000000000000..038ae5d723a3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_ldivmod.S
@@ -0,0 +1,46 @@
+//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+//   int64_t rem, quot;
+//   quot = __divmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+        push    {r6, lr}
+        sub     sp, sp, #16
+        add     r6, sp, #8
+        str     r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
+        bl      SYMBOL_NAME(__divmoddi4)
+        ldr     r2, [sp, #8]
+        ldr     r3, [sp, #12]
+        add     sp, sp, #16
+        pop     {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S
new file mode 100644
index 000000000000..e86d6113760e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcmp.S
@@ -0,0 +1,30 @@
+//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+#ifdef USE_THUMB_1
+        push    {r7, lr}
+        bl      memcmp
+        pop     {r7, pc}
+#else
+        b       memcmp
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S
new file mode 100644
index 000000000000..e83c5fd4dbb3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memcpy.S
@@ -0,0 +1,30 @@
+//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+#ifdef USE_THUMB_1
+        push    {r7, lr}
+        bl      memcpy
+        pop     {r7, pc}
+#else
+        b       memcpy
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S
new file mode 100644
index 000000000000..ee28300e46f2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memmove.S
@@ -0,0 +1,29 @@
+//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+#ifdef USE_THUMB_1
+        push    {r7, lr}
+        bl      memmove
+        pop     {r7, pc}
+#else
+        b       memmove
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S
new file mode 100644
index 000000000000..0a678d7627e7
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_memset.S
@@ -0,0 +1,50 @@
+//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+        mov     r3, r1
+        mov     r1, r2
+        mov     r2, r3
+#ifdef USE_THUMB_1
+        push    {r7, lr}
+        bl      memset
+        pop     {r7, pc}
+#else
+        b       memset
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+        mov     r2, r1
+        movs    r1, #0
+#ifdef USE_THUMB_1
+        push    {r7, lr}
+        bl      memset
+        pop     {r7, pc}
+#else
+        b       memset
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S
new file mode 100644
index 000000000000..88a4a6d8bc12
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uidivmod.S
@@ -0,0 +1,58 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if defined(USE_THUMB_1)
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r0, r2 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+#else // defined(USE_THUMB_1)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+#endif
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S
new file mode 100644
index 000000000000..be343b6bc826
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_uldivmod.S
@@ -0,0 +1,46 @@
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r6, lr}
+        sub	sp, sp, #16
+        add	r6, sp, #8
+        str	r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S
new file mode 100644
index 000000000000..e9db8bac7994
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/bswapdi2.S
@@ -0,0 +1,44 @@
+//===------- bswapdi2 - Implement bswapdi2 --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+//
+// extern uint64_t __bswapdi2(uint64_t);
+//
+// Reverse all the bytes in a 64-bit integer.
+//
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
+#if __ARM_ARCH < 6
+    // before armv6 does not have "rev" instruction
+    // r2 = rev(r0)
+    eor r2, r0, r0, ror #16
+    bic r2, r2, #0xff0000
+    mov r2, r2, lsr #8
+    eor r2, r2, r0, ror #8
+    // r0 = rev(r1)
+    eor r0, r1, r1, ror #16
+    bic r0, r0, #0xff0000
+    mov r0, r0, lsr #8
+    eor r0, r0, r1, ror #8
+#else
+    rev r2, r0  // r2 = rev(r0)
+    rev r0, r1  // r0 = rev(r1)
+#endif
+    mov r1, r2  // r1 = r2 = rev(r0)
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S
new file mode 100644
index 000000000000..1f6eed5c1bbf
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/bswapsi2.S
@@ -0,0 +1,36 @@
+//===------- bswapsi2 - Implement bswapsi2 --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+//
+// extern uint32_t __bswapsi2(uint32_t);
+//
+// Reverse all the bytes in a 32-bit integer.
+//
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
+#if __ARM_ARCH < 6
+    // before armv6 does not have "rev" instruction
+ 	eor	r1, r0, r0, ror #16
+ 	bic	r1, r1, #0xff0000
+ 	mov	r1, r1, lsr #8
+ 	eor	r0, r1, r0, ror #8
+#else
+    rev r0, r0
+#endif
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__bswapsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/chkstk.S b/contrib/compiler-rt/lib/builtins/arm/chkstk.S
new file mode 100644
index 000000000000..e3002105897e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// __chkstk routine
+// This routine is windows specific.
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// This clobbers the register r12, and the condition codes, and uses r5 and r6
+// as temporaries by backing them up and restoring them afterwards.
+// Does not modify any memory or the stack pointer.
+
+//      movw    r4,  #256 // Number of bytes of stack, in units of 4 byte
+//      bl      __chkstk
+//      sub.w   sp, sp, r4
+
+#define PAGE_SIZE 4096
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+        lsl    r4,  r4,  #2
+        mov    r12, sp
+        push   {r5, r6}
+        mov    r5,  r4
+1:
+        sub    r12, r12, #PAGE_SIZE
+        subs   r5,  r5,  #PAGE_SIZE
+        ldr    r6,  [r12]
+        bgt    1b
+
+        pop    {r5, r6}
+        bx     lr
+END_COMPILERRT_FUNCTION(__chkstk)
diff --git a/contrib/compiler-rt/lib/builtins/arm/clzdi2.S b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S
new file mode 100644
index 000000000000..fc03b385cdfa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/clzdi2.S
@@ -0,0 +1,93 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+	.p2align	2
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+	cmp	r0, 0
+	itee ne
+	clzne	r0, r0
+	clzeq	r0, r1
+	addeq	r0, r0, 32
+#else
+	cmp	r1, 0
+	itee ne
+	clzne	r0, r1
+	clzeq	r0, r0
+	addeq	r0, r0, 32
+#endif
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: upper half of n, overwritten after check
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+#ifdef __ARMEB__
+	cmp	r0, 0
+	moveq	r0, r1
+#else
+	cmp	r1, 0
+	movne	r0, r1
+#endif
+	movne	r1, 1
+	moveq	r1, 33
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/clzsi2.S b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S
new file mode 100644
index 000000000000..f2ce59c90119
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/clzsi2.S
@@ -0,0 +1,73 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+	.p2align	2
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#ifdef __ARM_FEATURE_CLZ
+	clz	r0, r0
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+	mov	r1, 1
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/comparesf2.S b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
new file mode 100644
index 000000000000..c6c4cc067f07
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/comparesf2.S
@@ -0,0 +1,296 @@
+//===-- comparesf2.S - Implement single-precision soft-float comparisons --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+//   __eqsf2   __gesf2   __unordsf2
+//   __lesf2   __gtsf2
+//   __ltsf2
+//   __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, with multiple names.
+//
+// The routines behave as follows:
+//
+//   __lesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordsf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+    .syntax unified
+    .text
+    DEFINE_CODE_STATE
+
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+    // Make copies of a and b with the sign bit shifted off the top.  These will
+    // be used to detect zeros and NaNs.
+#if defined(USE_THUMB_1)
+    push    {r6, lr}
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+#else
+    mov     r2,         r0, lsl #1
+    mov     r3,         r1, lsl #1
+#endif
+
+    // We do the comparison in three stages (ignoring NaN values for the time
+    // being).  First, we orr the absolute values of a and b; this sets the Z
+    // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
+    // effect this at all, but it *does* make sure that the C flag is clear for
+    // the subsequent operations.
+#if defined(USE_THUMB_1)
+    lsrs    r6,     r3, #1
+    orrs    r6,     r2
+#else
+    orrs    r12,    r2, r3, lsr #1
+#endif
+    // Next, we check if a and b have the same or different signs.  If they have
+    // opposite signs, this eor will set the N flag.
+#if defined(USE_THUMB_1)
+    beq     1f
+    movs    r6,     r0
+    eors    r6,     r1
+1:
+#else
+    it ne
+    eorsne  r12,    r0, r1
+#endif
+
+    // If a and b are equal (either both zeros or bit identical; again, we're
+    // ignoring NaNs for now), this subtract will zero out r0.  If they have the
+    // same sign, the flags are updated as they would be for a comparison of the
+    // absolute values of a and b.
+#if defined(USE_THUMB_1)
+    bmi     1f
+    subs    r0,     r2, r3
+1:
+#else
+    it pl
+    subspl  r0,     r2, r3
+#endif
+
+    // If a is smaller in magnitude than b and both have the same sign, place
+    // the negation of the sign of b in r0.  Thus, if both are negative and
+    // a > b, this sets r0 to 0; if both are positive and a < b, this sets
+    // r0 to -1.
+    //
+    // This is also done if a and b have opposite signs and are not both zero,
+    // because in that case the subtract was not performed and the C flag is
+    // still clear from the shift argument in orrs; if a is positive and b
+    // negative, this places 0 in r0; if a is negative and b positive, -1 is
+    // placed in r0.
+#if defined(USE_THUMB_1)
+    bhs     1f
+    // Here if a and b have the same sign and absA < absB, the result is thus
+    // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+    movs    r0,         #1
+    lsrs    r1,         #31
+    bne     LOCAL_LABEL(CHECK_NAN)
+    negs    r0,         r0
+    b       LOCAL_LABEL(CHECK_NAN)
+1:
+#else
+    it lo
+    mvnlo   r0,         r1, asr #31
+#endif
+
+    // If a is greater in magnitude than b and both have the same sign, place
+    // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
+    // in r0, which is the desired result.  Conversely, if both are positive
+    // and a > b, zero is placed in r0.
+#if defined(USE_THUMB_1)
+    bls     1f
+    // Here both have the same sign and absA > absB.
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN)
+    negs    r0, r0
+1:
+#else
+    it hi
+    movhi   r0,         r1, asr #31
+#endif
+
+    // If you've been keeping track, at this point r0 contains -1 if a < b and
+    // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
+    // If a == b, then the Z flag is set, so we can get the correct final value
+    // into r0 by simply or'ing with 1 if Z is clear.
+    // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if !defined(USE_THUMB_1)
+    it ne
+    orrne   r0,     r0, #1
+#endif
+
+    // Finally, we need to deal with NaNs.  If either argument is NaN, replace
+    // the value in r0 with 1.
+#if defined(USE_THUMB_1)
+LOCAL_LABEL(CHECK_NAN):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     1f
+    cmp     r3,         r6
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+    pop     {r6, pc}
+#else
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #1
+    JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__eqsf2)
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
+
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2)
+    // Identical to the preceding except in that we return -1 for NaN values.
+    // Given that the two paths share so much code, one might be tempted to
+    // unify them; however, the extra code needed to do so makes the code size
+    // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+#if defined(USE_THUMB_1)
+    push    {r6, lr}
+    lsls    r2,        r0, #1
+    lsls    r3,        r1, #1
+    lsrs    r6,        r3, #1
+    orrs    r6,        r2
+    beq     1f
+    movs    r6,        r0
+    eors    r6,        r1
+1:
+    bmi     2f
+    subs    r0,        r2, r3
+2:
+    bhs     3f
+    movs    r0,        #1
+    lsrs    r1,        #31
+    bne     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+    b       LOCAL_LABEL(CHECK_NAN_2)
+3:
+    bls     4f
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     5f
+    cmp     r3,         r6
+5:
+    bls     6f
+    movs    r0,         #1
+    negs    r0,         r0
+6:
+    pop     {r6, pc}
+#else
+    mov     r2,         r0, lsl #1
+    mov     r3,         r1, lsl #1
+    orrs    r12,    r2, r3, lsr #1
+    it ne
+    eorsne  r12,    r0, r1
+    it pl
+    subspl  r0,     r2, r3
+    it lo
+    mvnlo   r0,         r1, asr #31
+    it hi
+    movhi   r0,         r1, asr #31
+    it ne
+    orrne   r0,     r0, #1
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #-1
+    JMP(lr)
+#endif
+END_COMPILERRT_FUNCTION(__gtsf2)
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
+
+@ int __unordsf2(float a, float b)
+
+    .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov    r0,         s0
+    vmov    r1,         s1
+#endif
+    // Return 1 for NaN values, 0 otherwise.
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+    movs    r0,         #0
+#if defined(USE_THUMB_1)
+    movs    r1,         #0xff
+    lsls    r1,         #24
+    cmp     r2,         r1
+    bhi     1f
+    cmp     r3,         r1
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+#else
+    cmp     r2,         #0xff000000
+    ite ls
+    cmpls   r3,         #0xff000000
+    movhi   r0,         #1
+#endif
+    JMP(lr)
+END_COMPILERRT_FUNCTION(__unordsf2)
+
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+	vmov s0, r0
+	vmov s1, r1
+	b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S
new file mode 100644
index 000000000000..776ba4f24b47
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divdf3vfp.S
@@ -0,0 +1,33 @@
+//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __divdf3vfp(double a, double b);
+//
+// Divides two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f64 d0, d0, d1
+#else
+	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
+	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
+	vdiv.f64 d5, d6, d7
+	vmov	r0, r1, d5		// move result back to r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__divdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S
new file mode 100644
index 000000000000..8a027b741efe
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divmodsi4.S
@@ -0,0 +1,71 @@
+/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divmodsi4 (32-bit signed integer divide and
+ * modulus) function for the ARM architecture.  A naive digit-by-digit
+ * computation is employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME    \
+    push   {r4-r7, lr}   ;\
+    add     r7,     sp, #12
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4-r7, pc}
+
+	.syntax unified
+	.text
+  DEFINE_CODE_STATE
+
+@ int __divmodsi4(int divident, int divisor, int *remainder)
+@   Calculate the quotient and remainder of the (signed) division.  The return
+@   value is the quotient, the remainder is placed in the variable.
+
+	.p2align 3
+DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divzero)
+	mov 	r3, r0
+	sdiv	r0, r3, r1
+	mls 	r1, r0, r1, r3
+	str 	r1, [r2]
+	bx  	lr
+LOCAL_LABEL(divzero):
+	mov     r0, #0
+	bx      lr
+#else
+    ESTABLISH_FRAME
+//  Set aside the sign of the quotient and modulus, and the address for the
+//  modulus.
+    eor     r4,     r0, r1
+    mov     r5,     r0
+    mov     r6,     r2
+//  Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+    eor     ip,     r0, r0, asr #31
+    eor     lr,     r1, r1, asr #31
+    sub     r0,     ip, r0, asr #31
+    sub     r1,     lr, r1, asr #31
+//  Unsigned divmod:
+    bl      SYMBOL_NAME(__udivmodsi4)
+//  Apply the sign of quotient and modulus
+    ldr     r1,    [r6]
+    eor     r0,     r0, r4, asr #31
+    eor     r1,     r1, r5, asr #31
+    sub     r0,     r0, r4, asr #31
+    sub     r1,     r1, r5, asr #31
+    str     r1,    [r6]
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S
new file mode 100644
index 000000000000..130318f0c37b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __divsf3vfp(float a, float b);
+//
+// Divides two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f32 s0, s0, s1
+#else
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vdiv.f32 s13, s14, s15
+	vmov	r0, s13		// move result back to r0
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__divsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/divsi3.S b/contrib/compiler-rt/lib/builtins/arm/divsi3.S
new file mode 100644
index 000000000000..19757af177eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/divsi3.S
@@ -0,0 +1,82 @@
+/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divsi3 (32-bit signed integer divide) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+    push   {r4, r7, lr}    ;\
+    add     r7,     sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4, r7, pc}
+
+   .syntax unified
+   .text
+   DEFINE_CODE_STATE
+
+	.p2align 3
+// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3)
+
+@ int __divsi3(int divident, int divisor)
+@   Calculate and return the quotient of the (signed) division.
+
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+#if __ARM_ARCH_EXT_IDIV__
+   tst     r1,r1
+   beq     LOCAL_LABEL(divzero)
+   sdiv    r0, r0, r1
+   bx      lr
+LOCAL_LABEL(divzero):
+   mov     r0,#0
+   bx      lr
+#else
+ESTABLISH_FRAME
+//  Set aside the sign of the quotient.
+#  if defined(USE_THUMB_1)
+    movs    r4,     r0
+    eors    r4,     r1
+#  else
+    eor     r4,     r0, r1
+#  endif
+//  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+#  if defined(USE_THUMB_1)
+    asrs    r2,     r0, #31
+    asrs    r3,     r1, #31
+    eors    r0,     r2
+    eors    r1,     r3
+    subs    r0,     r0, r2
+    subs    r1,     r1, r3
+#  else
+    eor     r2,     r0, r0, asr #31
+    eor     r3,     r1, r1, asr #31
+    sub     r0,     r2, r0, asr #31
+    sub     r1,     r3, r1, asr #31
+#  endif
+//  abs(a) / abs(b)
+    bl      SYMBOL_NAME(__udivsi3)
+//  Apply sign of quotient to result and return.
+#  if defined(USE_THUMB_1)
+    asrs    r4,     #31
+    eors    r0,     r4
+    subs    r0,     r0, r4
+#  else
+    eor     r0,     r0, r4, asr #31
+    sub     r0,     r0, r4, asr #31
+#  endif
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__divsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S
new file mode 100644
index 000000000000..d50706570916
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqdf2vfp(double a, double b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov	d6, r0, r1	// load r0/r1 pair in double register
+	vmov	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
+	moveq	r0, #1		// set result register to 1 if equal
+	movne	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__eqdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S
new file mode 100644
index 000000000000..fd72b2fdbdee
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __eqsf2vfp(float a, float b);
+//
+// Returns one iff a == b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
+	moveq	r0, #1      // set result register to 1 if equal
+	movne	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__eqsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
new file mode 100644
index 000000000000..1079f977bae6
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/extendsfdf2vfp.S
@@ -0,0 +1,33 @@
+//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __extendsfdf2vfp(float a);
+//
+// Converts single precision float to double precision result.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR and a double precision result is returned in R0/R1 pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f64.f32 d0, s0
+#else
+	vmov	s15, r0      // load float register from R0
+	vcvt.f64.f32 d7, s15 // convert single to double
+	vmov	r0, r1, d7   // return result in r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S
new file mode 100644
index 000000000000..5d7b0f856549
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixdfsivfp.S
@@ -0,0 +1,34 @@
+//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f64 s0, d0
+	vmov r0, s0
+#else
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S
new file mode 100644
index 000000000000..805a277afa34
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixsfsivfp.S
@@ -0,0 +1,34 @@
+//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __fixsfsivfp(float a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f32 s0, s0
+	vmov r0, s0
+#else
+	vmov	s15, r0        // load float register from R0
+	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
+	vmov	r0, s15	       // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixsfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
new file mode 100644
index 000000000000..4f1b2c8cefdc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixunsdfsivfp.S
@@ -0,0 +1,35 @@
+//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunsdfsivfp(double a);
+//
+// Converts double precision float to a 32-bit unsigned int rounding towards 
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f64 s0, d0
+	vmov r0, s0
+#else
+	vmov	d7, r0, r1    // load double register from R0/R1
+	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
+	vmov	r0, s15	      // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S
new file mode 100644
index 000000000000..e5d778236879
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/fixunssfsivfp.S
@@ -0,0 +1,35 @@
+//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern unsigned int __fixunssfsivfp(float a);
+//
+// Converts single precision float to a 32-bit unsigned int rounding towards 
+// zero. All negative values become zero.
+// Uses Darwin calling convention where a single precision parameter is 
+// passed in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f32 s0, s0
+	vmov r0, s0
+#else
+	vmov	s15, r0        // load float register from R0
+	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
+	vmov	r0, s15	       // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__fixunssfsivfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S
new file mode 100644
index 000000000000..3297ad44d8cd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatsidfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatsidfvfp(int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is 
+// return in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.s32 d0, s0
+#else
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatsidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S
new file mode 100644
index 000000000000..65408b54b8d4
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatsisfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatsisfvfp(int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is 
+// return in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.s32 s0, s0
+#else
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatsisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
new file mode 100644
index 000000000000..d7a7024a25b8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatunssidfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __floatunssidfvfp(unsigned int a);
+//
+// Converts a 32-bit int to a double precision float.
+// Uses Darwin calling convention where a double precision result is 
+// return in GPR register pair.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.u32 d0, s0
+#else
+	vmov	s15, r0        // move int to float register s15
+	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
+	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatunssidfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S
new file mode 100644
index 000000000000..1ca856519a92
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/floatunssisfvfp.S
@@ -0,0 +1,34 @@
+//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __floatunssisfvfp(unsigned int a);
+//
+// Converts single precision float to a 32-bit int rounding towards zero.
+// Uses Darwin calling convention where a single precision result is 
+// return in a GPR..
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.u32 s0, s0
+#else
+	vmov	s15, r0	       // move int to float register s15
+	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
+	vmov	r0, s15        // move s15 to result register
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__floatunssisfvfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S
new file mode 100644
index 000000000000..364fc5b24cd1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gedf2vfp(double a, double b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
+	movge	r0, #1      // set result register to 1 if greater than or equal
+	movlt	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S
new file mode 100644
index 000000000000..346c3473ae4c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gesf2vfp(float a, float b);
+//
+// Returns one iff a >= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
+	movge	r0, #1      // set result register to 1 if greater than or equal
+	movlt	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S
new file mode 100644
index 000000000000..3389c3ad9737
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __gtdf2vfp(double a, double b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
+	movgt	r0, #1		// set result register to 1 if equal
+	movle	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gtdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S
new file mode 100644
index 000000000000..afdba8b018e2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __gtsf2vfp(float a, float b);
+//
+// Returns one iff a > b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0		// move from GPR 0 to float register
+	vmov	s15, r1		// move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
+	movgt	r0, #1		// set result register to 1 if equal
+	movle	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__gtsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S
new file mode 100644
index 000000000000..4bbe4c86837c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ledf2vfp(double a, double b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
+	movls	r0, #1		// set result register to 1 if equal
+	movhi	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ledf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S
new file mode 100644
index 000000000000..51232bd8cedc
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __lesf2vfp(float a, float b);
+//
+// Returns one iff a <= b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
+	movls	r0, #1      // set result register to 1 if equal
+	movhi	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__lesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S
new file mode 100644
index 000000000000..8e2928c813d2
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __ltdf2vfp(double a, double b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
+	movmi	r0, #1		// set result register to 1 if equal
+	movpl	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ltdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S
new file mode 100644
index 000000000000..59c00c6bab67
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __ltsf2vfp(float a, float b);
+//
+// Returns one iff a < b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1     // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
+	movmi	r0, #1      // set result register to 1 if equal
+	movpl	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__ltsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/modsi3.S b/contrib/compiler-rt/lib/builtins/arm/modsi3.S
new file mode 100644
index 000000000000..be263834d7f1
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/modsi3.S
@@ -0,0 +1,60 @@
+/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __modsi3 (32-bit signed integer modulus) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+    push   {r4, r7, lr}    ;\
+    add     r7,     sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+    pop    {r4, r7, pc}
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+@ int __modsi3(int divident, int divisor)
+@   Calculate and return the remainder of the (signed) division.
+
+	.p2align 3
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divzero)
+	sdiv	r2, r0, r1
+	mls 	r0, r2, r1, r0
+	bx      lr
+LOCAL_LABEL(divzero):
+	mov     r0, #0
+	bx      lr
+#else
+    ESTABLISH_FRAME
+    //  Set aside the sign of the dividend.
+    mov     r4,     r0
+    //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+    eor     r2,     r0, r0, asr #31
+    eor     r3,     r1, r1, asr #31
+    sub     r0,     r2, r0, asr #31
+    sub     r1,     r3, r1, asr #31
+    //  abs(a) % abs(b)
+    bl     SYMBOL_NAME(__umodsi3)
+    //  Apply sign of dividend to result and return.
+    eor     r0,     r0, r4, asr #31
+    sub     r0,     r0, r4, asr #31
+    CLEAR_FRAME_AND_RETURN
+#endif
+END_COMPILERRT_FUNCTION(__modsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S
new file mode 100644
index 000000000000..aa7b23495034
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/muldf3vfp.S
@@ -0,0 +1,33 @@
+//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __muldf3vfp(double a, double b);
+//
+// Multiplies two double precision floating point numbers using the Darwin
+// calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f64 d0, d0, d1
+#else
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vmul.f64 d6, d6, d7
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__muldf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S
new file mode 100644
index 000000000000..a1da789dcade
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/mulsf3vfp.S
@@ -0,0 +1,33 @@
+//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __mulsf3vfp(float a, float b);
+//
+// Multiplies two single precision floating point numbers using the Darwin
+// calling convention where single arguments are passsed like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f32 s0, s0, s1
+#else
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vmul.f32 s13, s14, s15
+#endif
+	vmov	r0, s13		// move result back to r0
+	bx	lr
+END_COMPILERRT_FUNCTION(__mulsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S
new file mode 100644
index 000000000000..aef72eb00974
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S
@@ -0,0 +1,37 @@
+//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __nedf2vfp(double a, double b);
+//
+// Returns zero if a and b are unequal and neither is NaN.
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7		
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
+	movne	r0, #1		// set result register to 0 if unequal
+	moveq	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__nedf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S
new file mode 100644
index 000000000000..81f0ab8eec1d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/negdf2vfp.S
@@ -0,0 +1,30 @@
+//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __negdf2vfp(double a, double b);
+//
+// Returns the negation a double precision floating point numbers using the 
+// Darwin calling convention where double arguments are passsed in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f64 d0, d0
+#else
+	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__negdf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S
new file mode 100644
index 000000000000..46ab4a9cf164
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/negsf2vfp.S
@@ -0,0 +1,30 @@
+//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __negsf2vfp(float a);
+//
+// Returns the negation of a single precision floating point numbers using the 
+// Darwin calling convention where single arguments are passsed like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f32 s0, s0
+#else
+	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__negsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S
new file mode 100644
index 000000000000..50d60f493005
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S
@@ -0,0 +1,37 @@
+//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __nesf2vfp(float a, float b);
+//
+// Returns one iff a != b and neither is NaN.
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0	    // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
+	movne	r0, #1      // set result register to 1 if unequal
+	moveq	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__nesf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S
new file mode 100644
index 000000000000..0692cf3e1b77
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/restore_vfp_d8_d15_regs.S
@@ -0,0 +1,35 @@
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog.  But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers.  In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+	.text
+	.syntax unified
+
+//
+// Restore registers d8-d15 from stack
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs)
+	vldmia	sp!, {d8-d15}           // pop registers d8-d15 off stack
+	bx      lr                      // return to prolog
+END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S
new file mode 100644
index 000000000000..544dd5467a4d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/save_vfp_d8_d15_regs.S
@@ -0,0 +1,35 @@
+//===-- save_restore_regs.S - Implement save/restore* ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling C++ functions that need to handle thrown exceptions the
+// compiler is required to save all registers and call __Unwind_SjLj_Register
+// in the function prolog.  But when compiling for thumb1, there are
+// no instructions to access the floating point registers, so the
+// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs
+// written in ARM to save the float registers.  In the epilog, the compiler
+// must also add a call to __restore_vfp_d8_d15_regs to restore those registers.
+//
+
+	.text
+	.syntax unified
+
+//
+// Save registers d8-d15 onto stack
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs)
+	vstmdb	sp!, {d8-d15}           // push registers d8-d15 onto stack
+	bx      lr                      // return to prolog
+END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list
new file mode 100644
index 000000000000..cc6a4b3cdd2e
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/softfloat-alias.list
@@ -0,0 +1,21 @@
+#
+# These are soft float functions which can be 
+# aliased to the *vfp functions on arm processors
+# that support floating point instructions.
+#
+___adddf3vfp		___adddf3
+___addsf3vfp		___addsf3
+___divdf3vfp		___divdf3
+___divsf3vfp		___divsf3
+___extendsfdf2vfp	___extendsfdf2
+___fixdfsivfp		___fixdfsi
+___fixsfsivfp		___fixsfsi
+___floatsidfvfp		___floatsidf
+___floatsisfvfp		___floatsisf
+___muldf3vfp		___muldf3
+___mulsf3vfp		___mulsf3
+___subdf3vfp		___subdf3
+___subsf3vfp		___subsf3
+___truncdfsf2vfp	___truncdfsf2
+___floatunssidfvfp	___floatunsidf
+___floatunssisfvfp	___floatunsisf
diff --git a/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S
new file mode 100644
index 000000000000..2b6f2bdbfdd5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/subdf3vfp.S
@@ -0,0 +1,33 @@
+//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern double __subdf3vfp(double a, double b);
+//
+// Returns difference between two double precision floating point numbers using 
+// the Darwin calling convention where double arguments are passsed in GPR pairs
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f64 d0, d0, d1
+#else
+	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
+	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
+	vsub.f64 d6, d6, d7		
+	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__subdf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S
new file mode 100644
index 000000000000..3e83ea26507d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/subsf3vfp.S
@@ -0,0 +1,34 @@
+//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __subsf3vfp(float a, float b);
+//
+// Returns the difference between two single precision floating point numbers
+// using the Darwin calling convention where single arguments are passsed
+// like 32-bit ints.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f32 s0, s0, s1
+#else
+	vmov	s14, r0		// move first param from r0 into float register
+	vmov	s15, r1		// move second param from r1 into float register
+	vsub.f32 s14, s14, s15
+	vmov	r0, s14		// move result back to r0
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__subsf3vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch16.S b/contrib/compiler-rt/lib/builtins/arm/switch16.S
new file mode 100644
index 000000000000..df9e38e176ce
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch16.S
@@ -0,0 +1,46 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed 2-byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
+	ldrh    ip, [lr, #-1]           // get first 16-bit word in table
+	cmp     r0, ip                  // compare with index
+	add     r0, lr, r0, lsl #1      // compute address of element in table
+	add     ip, lr, ip, lsl #1      // compute address of last element in table
+	ite lo
+	ldrshlo r0, [r0, #1]            // load 16-bit element if r0 is in range
+	ldrshhs r0, [ip, #1]            // load 16-bit element if r0 out of range
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switch16)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch32.S b/contrib/compiler-rt/lib/builtins/arm/switch32.S
new file mode 100644
index 000000000000..d97b5361436d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch32.S
@@ -0,0 +1,46 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed 4-byte sized elements which are the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
+	ldr     ip, [lr, #-1]            // get first 32-bit word in table
+	cmp     r0, ip                   // compare with index
+	add     r0, lr, r0, lsl #2       // compute address of element in table
+	add     ip, lr, ip, lsl #2       // compute address of last element in table
+	ite lo
+	ldrlo   r0, [r0, #3]             // load 32-bit element if r0 is in range
+	ldrhs   r0, [ip, #3]             // load 32-bit element if r0 out of range
+	add     ip, lr, r0               // compute label = lr + element
+	bx      ip                       // jump to computed label
+END_COMPILERRT_FUNCTION(__switch32)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switch8.S b/contrib/compiler-rt/lib/builtins/arm/switch8.S
new file mode 100644
index 000000000000..4d9e0eaff845
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switch8.S
@@ -0,0 +1,44 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains signed byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
+	ldrb    ip, [lr, #-1]           // get first byte in table
+	cmp     r0, ip                  // signed compare with index
+	ite lo
+	ldrsblo r0, [lr, r0]            // get indexed byte out of table
+	ldrsbhs r0, [lr, ip]            // if out of range, use last entry in table
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switch8)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/switchu8.S b/contrib/compiler-rt/lib/builtins/arm/switchu8.S
new file mode 100644
index 000000000000..4ffe35f0549b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/switchu8.S
@@ -0,0 +1,44 @@
+//===-- switch.S - Implement switch* --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling switch statements in thumb mode, the compiler
+// can use these __switch* helper functions  The compiler emits a blx to
+// the __switch* function followed by a table of displacements for each
+// case statement.  On entry, R0 is the index into the table. The __switch*
+// function uses the return address in lr to find the start of the table.
+// The first entry in the table is the count of the entries in the table.
+// It then uses R0 to index into the table and get the displacement of the
+// address to jump to.  If R0 is greater than the size of the table, it jumps
+// to the last entry in the table. Each displacement in the table is actually
+// the distance from lr to the label, thus making the tables PIC.
+
+
+	.text
+	.syntax unified
+
+//
+// The table contains unsigned byte sized elements which are 1/2 the distance
+// from lr to the target label.
+//
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
+	ldrb    ip, [lr, #-1]           // get first byte in table
+	cmp     r0, ip                  // compare with index
+	ite lo
+	ldrblo  r0, [lr, r0]            // get indexed byte out of table
+	ldrbhs  r0, [lr, ip]            // if out of range, use last entry in table
+	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
+	bx      ip                      // jump to computed label
+END_COMPILERRT_FUNCTION(__switchu8)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync-ops.h b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h
new file mode 100644
index 000000000000..ee02c30c6eaa
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync-ops.h
@@ -0,0 +1,64 @@
+/*===-- sync-ops.h - --===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements outline macros for the __sync_fetch_and_*
+ * operations. Different instantiations will generate appropriate assembly for
+ * ARM and Thumb-2 versions of the functions.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define SYNC_OP_4(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        .syntax unified ; \
+        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrex r0, [r12] ; \
+        op(r2, r0, r1) ; \
+        strex r3, r2, [r12] ; \
+        cmp r3, #0 ; \
+        bne LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        bx lr
+
+#define SYNC_OP_8(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        .syntax unified ; \
+        DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \
+        push {r4, r5, r6, lr} ; \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrexd r0, r1, [r12] ; \
+        op(r4, r5, r0, r1, r2, r3) ; \
+        strexd r6, r4, r5, [r12] ; \
+        cmp r6, #0 ; \
+        bne LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        pop {r4, r5, r6, pc}
+
+#define MINMAX_4(rD, rN, rM, cmp_kind) \
+        cmp rN, rM ; \
+        mov rD, rM ; \
+        it cmp_kind ; \
+        mov##cmp_kind rD, rN
+
+#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
+        cmp rN_LO, rM_LO ; \
+        sbcs rN_HI, rM_HI ; \
+        mov rD_LO, rM_LO ; \
+        mov rD_HI, rM_HI ; \
+        itt cmp_kind ; \
+        mov##cmp_kind rD_LO, rN_LO ; \
+        mov##cmp_kind rD_HI, rN_HI
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S
new file mode 100644
index 000000000000..7877d6c46c11
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_4.S
@@ -0,0 +1,23 @@
+/*===-- sync_fetch_and_add_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "adds" is 2 bytes shorter than "add". */
+#define add_4(rD, rN, rM)  add rD, rN, rM
+
+SYNC_OP_4(add_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S
new file mode 100644
index 000000000000..1df07a342a1b
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_add_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_add_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    adds rD_LO, rN_LO, rM_LO ; \
+    adc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(add_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S
new file mode 100644
index 000000000000..720ff02279cd
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_and_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define and_4(rD, rN, rM)  and rD, rN, rM
+
+SYNC_OP_4(and_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S
new file mode 100644
index 000000000000..4f7b5ca7ab29
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_and_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_and_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    and rD_LO, rN_LO, rM_LO ; \
+    and rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(and_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S
new file mode 100644
index 000000000000..43da9c7d4067
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_max_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define max_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, gt)
+
+SYNC_OP_4(max_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S
new file mode 100644
index 000000000000..898fc6202ac8
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_max_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_max_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt)
+
+SYNC_OP_8(max_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S
new file mode 100644
index 000000000000..bba31a03aace
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_min_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt)
+
+SYNC_OP_4(min_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S
new file mode 100644
index 000000000000..e7ccf9fb60ef
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_min_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_min_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt)
+
+SYNC_OP_8(min_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S
new file mode 100644
index 000000000000..c13dd394588f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define nand_4(rD, rN, rM)  bic rD, rN, rM
+
+SYNC_OP_4(nand_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S
new file mode 100644
index 000000000000..e8107ab3a33c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_nand_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    bic rD_LO, rN_LO, rM_LO ; \
+    bic rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(nand_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S
new file mode 100644
index 000000000000..6726571a944f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_or_4.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define or_4(rD, rN, rM)  orr rD, rN, rM
+
+SYNC_OP_4(or_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S
new file mode 100644
index 000000000000..f7f162c7c3b3
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_or_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_or_8.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    orr rD_LO, rN_LO, rM_LO ; \
+    orr rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(or_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S
new file mode 100644
index 000000000000..b9326b14cdd5
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_4.S
@@ -0,0 +1,23 @@
+/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "subs" is 2 bytes shorter than "sub". */
+#define sub_4(rD, rN, rM)  sub rD, rN, rM
+
+SYNC_OP_4(sub_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S
new file mode 100644
index 000000000000..6ce743e5ee9f
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_sub_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    subs rD_LO, rN_LO, rM_LO ; \
+    sbc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(sub_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S
new file mode 100644
index 000000000000..b8d19ff35057
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umax_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, hi)
+
+SYNC_OP_4(umax_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S
new file mode 100644
index 000000000000..34442fd77454
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umax_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi)
+
+SYNC_OP_8(umax_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S
new file mode 100644
index 000000000000..0998e3e10f58
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo)
+
+SYNC_OP_4(umin_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S
new file mode 100644
index 000000000000..558f91390512
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_umin_8.S
@@ -0,0 +1,24 @@
+/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo)
+
+SYNC_OP_8(umin_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S
new file mode 100644
index 000000000000..824f49146880
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_4.S
@@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define xor_4(rD, rN, rM)  eor rD, rN, rM
+
+SYNC_OP_4(xor_4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S
new file mode 100644
index 000000000000..073fb9c20f25
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_fetch_and_xor_8.S
@@ -0,0 +1,26 @@
+/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#if __ARM_ARCH_PROFILE != 'M'
+#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    eor rD_LO, rN_LO, rM_LO ; \
+    eor rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(xor_8)
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S
new file mode 100644
index 000000000000..61d1db910f0d
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/sync_synchronize.S
@@ -0,0 +1,38 @@
+//===-- sync_synchronize - Implement memory barrier * ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode
+// the compiler may emit a call to __sync_synchronize.  
+// On Darwin the implementation jumps to an OS supplied function named 
+// OSMemoryBarrier
+//
+
+	.text
+	.syntax unified
+
+#if __APPLE__
+
+	.p2align 2
+DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize)
+	stmfd	sp!, {r7, lr}
+	add		r7, sp, #0
+	bl		_OSMemoryBarrier
+	ldmfd	sp!, {r7, pc}
+END_COMPILERRT_FUNCTION(__sync_synchronize)
+
+	// tell linker it can break up file at label boundaries
+	.subsections_via_symbols
+		
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
new file mode 100644
index 000000000000..682e54d3d294
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
@@ -0,0 +1,33 @@
+//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern float __truncdfsf2vfp(double a);
+//
+// Converts double precision float to signle precision result.
+// Uses Darwin calling convention where a double precision parameter is 
+// passed in a R0/R1 pair and a signle precision result is returned in R0.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f32.f64 s0, d0
+#else
+	vmov 	d7, r0, r1   // load double from r0/r1 pair
+	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
+	vmov 	r0, s15      // return result in r0
+#endif
+	bx	lr
+END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S
new file mode 100644
index 000000000000..ee3950c9b0eb
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/udivmodsi4.S
@@ -0,0 +1,180 @@
+/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
+ * modulus) function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
+@                           unsigned int *remainder)
+@   Calculate the quotient and remainder of the (unsigned) division.  The return
+@   value is the quotient, the remainder is placed in the variable.
+
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	mov 	r3, r0
+	udiv	r0, r3, r1
+	mls 	r1, r0, r1, r3
+	str 	r1, [r2]
+	bx  	lr
+#else
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+	beq	LOCAL_LABEL(divby1)
+	cmp	r0, r1
+	bcc	LOCAL_LABEL(quotient0)
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  ifdef __ARM_FEATURE_CLZ
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if defined(USE_THUMB_2)
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #2
+	sub	ip, ip, r3, lsl #3
+	mov	r3, #0
+	bx	ip
+#  else
+#    if defined(USE_THUMB_2)
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+	str	r4, [sp, #-8]!
+
+	mov	r4, r0
+	adr	ip, LOCAL_LABEL(div0block)
+
+	lsr	r3, r4, #16
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(16 * 12)
+
+	lsr	r3, r4, #8
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(8 * 12)
+
+	lsr	r3, r4, #4
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, #(4 * 12)
+
+	lsr	r3, r4, #2
+	cmp	r3, r1
+	movhs	r4, r3
+	subhs	ip, ip, #(2 * 12)
+
+	/* Last block, no need to update r3 or r4. */
+	cmp	r1, r4, lsr #1
+	subls	ip, ip, #(1 * 12)
+
+	ldr	r4, [sp], #8	/* restore r4, we are done with it. */
+	mov	r3, #0
+
+	JMP(ip)
+#  endif
+
+#define	IMM	#
+
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	ITT(hs);                                                               \
+	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+
+	str	r0, [r2]
+	mov	r0, r3
+	JMP(lr)
+
+LOCAL_LABEL(quotient0):
+	str	r0, [r2]
+	mov	r0, #0
+	JMP(lr)
+
+LOCAL_LABEL(divby1):
+	mov	r3, #0
+	str	r3, [r2]
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+	mov	r0, #0
+#ifdef __ARM_EABI__
+	b	__aeabi_idiv0
+#else
+	JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__udivmodsi4)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/udivsi3.S b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S
new file mode 100644
index 000000000000..6dea27d404ff
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/udivsi3.S
@@ -0,0 +1,264 @@
+/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivsi3 (32-bit unsigned integer divide)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+
+DEFINE_CODE_STATE
+
+	.p2align 2
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
+
+@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
+@   Calculate and return the quotient of the (unsigned) division.
+
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	udiv	r0, r0, r1
+	bx  	lr
+
+LOCAL_LABEL(divby0):
+	mov     r0, #0
+#  ifdef __ARM_EABI__
+	b       __aeabi_idiv0
+#  else
+	JMP(lr)
+#  endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+#if defined(USE_THUMB_1)
+	bne LOCAL_LABEL(num_neq_denom)
+	JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
+	IT(eq)
+	JMPc(lr, eq)
+#endif
+	cmp	r0, r1
+#if defined(USE_THUMB_1)
+	bhs LOCAL_LABEL(num_ge_denom)
+	movs r0, #0
+	JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
+	ITT(cc)
+	movcc	r0, #0
+	JMPc(lr, cc)
+#endif
+
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  if defined(__ARM_FEATURE_CLZ)
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if defined(USE_THUMB_2)
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #2
+	sub	ip, ip, r3, lsl #3
+	mov	r3, #0
+	bx	ip
+#  else /* No CLZ Feature */
+#    if defined(USE_THUMB_2)
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+#    if defined(USE_THUMB_1)
+#      define BLOCK_SIZE 10
+#    else
+#      define BLOCK_SIZE 12
+#    endif
+
+	mov	r2, r0
+#    if defined(USE_THUMB_1)
+	mov ip, r0
+	adr r0, LOCAL_LABEL(div0block)
+	adds r0, #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	lsrs	r3, r2, #16
+	cmp	r3, r1
+#    if defined(USE_THUMB_1)
+	blo LOCAL_LABEL(skip_16)
+	movs r2, r3
+	subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+#    else
+	movhs	r2, r3
+	subhs	ip, ip, #(16 * BLOCK_SIZE)
+#    endif
+
+	lsrs	r3, r2, #8
+	cmp	r3, r1
+#    if defined(USE_THUMB_1)
+	blo LOCAL_LABEL(skip_8)
+	movs r2, r3
+	subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+#    else
+	movhs	r2, r3
+	subhs	ip, ip, #(8 * BLOCK_SIZE)
+#    endif
+
+	lsrs	r3, r2, #4
+	cmp	r3, r1
+#    if defined(USE_THUMB_1)
+	blo LOCAL_LABEL(skip_4)
+	movs r2, r3
+	subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+#    else
+	movhs	r2, r3
+	subhs	ip, #(4 * BLOCK_SIZE)
+#    endif
+
+	lsrs	r3, r2, #2
+	cmp	r3, r1
+#    if defined(USE_THUMB_1)
+	blo LOCAL_LABEL(skip_2)
+	movs r2, r3
+	subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+#    else
+	movhs	r2, r3
+	subhs	ip, ip, #(2 * BLOCK_SIZE)
+#    endif
+
+	/* Last block, no need to update r2 or r3. */
+#    if defined(USE_THUMB_1)
+	lsrs r3, r2, #1
+	cmp r3, r1
+	blo LOCAL_LABEL(skip_1)
+	subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+	movs r2, r0
+	mov r0, ip
+	movs r3, #0
+	JMP (r2)
+
+#    else
+	cmp	r1, r2, lsr #1
+	subls	ip, ip, #(1 * BLOCK_SIZE)
+
+	movs	r3, #0
+
+	JMP(ip)
+#    endif
+#  endif /* __ARM_FEATURE_CLZ */
+
+
+#define	IMM	#
+	/* due to the range limit of branch in Thumb1, we have to place the
+		 block closer */
+LOCAL_LABEL(divby0):
+	movs	r0, #0
+#      if defined(__ARM_EABI__)
+	push {r7, lr}
+	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
+	pop  {r7, pc}
+#      else
+	JMP(lr)
+#      endif
+
+
+#if defined(USE_THUMB_1)
+#define block(shift)                                                           \
+	lsls r2, r1, IMM shift;                                                      \
+	cmp r0, r2;                                                                  \
+	blo LOCAL_LABEL(block_skip_##shift);                                         \
+	subs r0, r0, r2;                                                             \
+	LOCAL_LABEL(block_skip_##shift) :;                                           \
+	adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+	/* TODO: if current location counter is not not word aligned, we don't
+		 need the .p2align and nop */
+	/* Label div0block must be word-aligned. First align block 31 */
+	.p2align 2
+	nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	ITT(hs);                                                               \
+	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+#endif
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+
+	mov	r0, r3
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+END_COMPILERRT_FUNCTION(__udivsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/umodsi3.S b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S
new file mode 100644
index 000000000000..069fad34cb9c
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/umodsi3.S
@@ -0,0 +1,158 @@
+/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __umodsi3 (32-bit unsigned integer modulus)
+ * function for the ARM 32-bit architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+	.syntax unified
+	.text
+	DEFINE_CODE_STATE
+
+@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
+@   Calculate and return the remainder of the (unsigned) division.
+
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+#if __ARM_ARCH_EXT_IDIV__
+	tst     r1, r1
+	beq     LOCAL_LABEL(divby0)
+	udiv	r2, r0, r1
+	mls 	r0, r2, r1, r0
+	bx  	lr
+#else
+	cmp	r1, #1
+	bcc	LOCAL_LABEL(divby0)
+	ITT(eq)
+	moveq	r0, #0
+	JMPc(lr, eq)
+	cmp	r0, r1
+	IT(cc)
+	JMPc(lr, cc)
+	/*
+	 * Implement division using binary long division algorithm.
+	 *
+	 * r0 is the numerator, r1 the denominator.
+	 *
+	 * The code before JMP computes the correct shift I, so that
+	 * r0 and (r1 << I) have the highest bit set in the same position.
+	 * At the time of JMP, ip := .Ldiv0block - 8 * I.
+	 * This depends on the fixed instruction size of block.
+	 * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
+	 *
+	 * block(shift) implements the test-and-update-quotient core.
+	 * It assumes (r0 << shift) can be computed without overflow and
+	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
+	 */
+
+#  ifdef __ARM_FEATURE_CLZ
+	clz	ip, r0
+	clz	r3, r1
+	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+	sub	r3, r3, ip
+#    if defined(USE_THUMB_2)
+	adr	ip, LOCAL_LABEL(div0block) + 1
+	sub	ip, ip, r3, lsl #1
+#    else
+	adr	ip, LOCAL_LABEL(div0block)
+#    endif
+	sub	ip, ip, r3, lsl #3
+	bx	ip
+#  else
+#    if defined(USE_THUMB_2)
+#    error THUMB mode requires CLZ or UDIV
+#    endif
+	mov	r2, r0
+	adr	ip, LOCAL_LABEL(div0block)
+
+	lsr	r3, r2, #16
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(16 * 8)
+
+	lsr	r3, r2, #8
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(8 * 8)
+
+	lsr	r3, r2, #4
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, #(4 * 8)
+
+	lsr	r3, r2, #2
+	cmp	r3, r1
+	movhs	r2, r3
+	subhs	ip, ip, #(2 * 8)
+
+	/* Last block, no need to update r2 or r3. */
+	cmp	r1, r2, lsr #1
+	subls	ip, ip, #(1 * 8)
+
+	JMP(ip)
+#  endif
+
+#define	IMM	#
+
+#define block(shift)                                                           \
+	cmp	r0, r1, lsl IMM shift;                                         \
+	IT(hs);                                                                \
+	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+
+	block(31)
+	block(30)
+	block(29)
+	block(28)
+	block(27)
+	block(26)
+	block(25)
+	block(24)
+	block(23)
+	block(22)
+	block(21)
+	block(20)
+	block(19)
+	block(18)
+	block(17)
+	block(16)
+	block(15)
+	block(14)
+	block(13)
+	block(12)
+	block(11)
+	block(10)
+	block(9)
+	block(8)
+	block(7)
+	block(6)
+	block(5)
+	block(4)
+	block(3)
+	block(2)
+	block(1)
+LOCAL_LABEL(div0block):
+	block(0)
+	JMP(lr)
+#endif /* __ARM_ARCH_EXT_IDIV__ */
+
+LOCAL_LABEL(divby0):
+	mov	r0, #0
+#ifdef __ARM_EABI__
+	b	__aeabi_idiv0
+#else
+	JMP(lr)
+#endif
+
+END_COMPILERRT_FUNCTION(__umodsi3)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S
new file mode 100644
index 000000000000..6625fa8a3119
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S
@@ -0,0 +1,37 @@
+//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unorddf2vfp(double a, double b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where double precision arguments are passsed 
+// like in GPR pairs.
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
+	vmov 	d6, r0, r1	// load r0/r1 pair in double register
+	vmov 	d7, r2, r3	// load r2/r3 pair in double register
+	vcmp.f64 d6, d7
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
+	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
+	movvc	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__unorddf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S
new file mode 100644
index 000000000000..0b5da2ba3e17
--- /dev/null
+++ b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S
@@ -0,0 +1,37 @@
+//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+//
+// extern int __unordsf2vfp(float a, float b);
+//
+// Returns one iff a or b is NaN
+// Uses Darwin calling convention where single precision arguments are passsed 
+// like 32-bit ints
+//
+	.syntax unified
+	.p2align 2
+DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
+	vmov	s14, r0     // move from GPR 0 to float register
+	vmov	s15, r1	    // move from GPR 1 to float register
+	vcmp.f32 s14, s15
+#endif
+	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
+	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
+	movvc	r0, #0
+	bx	lr
+END_COMPILERRT_FUNCTION(__unordsf2vfp)
+
+NO_EXEC_STACK_DIRECTIVE
+