aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBruce Evans <bde@FreeBSD.org>2008-01-17 16:39:07 +0000
committerBruce Evans <bde@FreeBSD.org>2008-01-17 16:39:07 +0000
commitd2012f333362e46e807f43b56f29b4a2c98441c3 (patch)
tree41bf3b2fe1dad4b2008269aafa19f504b3e2bc77 /lib
parenta4b679d859ef2525eebadcfa325953ebcff24f8a (diff)
downloadsrc-d2012f333362e46e807f43b56f29b4a2c98441c3.tar.gz
src-d2012f333362e46e807f43b56f29b4a2c98441c3.zip
Add an alternative view of the bits in an 80-bit long double (64+16
instead of 32+32+15+1) on all arches that have such long doubles (amd64, ia64 and i386). Large objects should be be accessed in large units, and the 32+32+15+1[+padding] decomposition asks for almost the opposite of that, sometimes resulting in very slow accesses depending on how well the compiler ignores what we ask for and converts to the best units for the given machine. E.g., on Athlons, there is a 10-20 cycle penalty for accessing the middle 32-bit word immediately after an 80-bit store. Whether actually using the alternative view is better is very machine- dependent. A 32+32+16 view is probably best with old 32-bit systems and gcc through 4.2.1. The compiler should mostly avoid the view and generate best accesses, but gcc-4.2.1 is far from doing that. I think 64+16 is best for now. Similarly for doubles -- they should be using 64+0 especially on 64-bit machines, but fdlibm uses 32+32 extensively for them. Fortunately, in 64-bit mode for doubles, gcc already ignores the 32+32-bit view and generates best accesses in many cases.
Notes
Notes: svn path=/head/; revision=175402
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/amd64/_fpmath.h7
-rw-r--r--lib/libc/i386/_fpmath.h7
-rw-r--r--lib/libc/ia64/_fpmath.h13
3 files changed, 24 insertions, 3 deletions
diff --git a/lib/libc/amd64/_fpmath.h b/lib/libc/amd64/_fpmath.h
index 8f8cf6fb8075..c2a73848d6a1 100644
--- a/lib/libc/amd64/_fpmath.h
+++ b/lib/libc/amd64/_fpmath.h
@@ -36,6 +36,11 @@ union IEEEl2bits {
unsigned int junkl :16;
unsigned int junkh :32;
} bits;
+ struct {
+ unsigned long man :64;
+ unsigned int expsign :16;
+ unsigned long junk :48;
+ } xbits;
};
#define LDBL_NBIT 0x80000000
@@ -47,4 +52,4 @@ union IEEEl2bits {
#define LDBL_TO_ARRAY32(u, a) do { \
(a)[0] = (uint32_t)(u).bits.manl; \
(a)[1] = (uint32_t)(u).bits.manh; \
-} while(0)
+} while (0)
diff --git a/lib/libc/i386/_fpmath.h b/lib/libc/i386/_fpmath.h
index 80e764f8eacf..4f1f5f4a5a16 100644
--- a/lib/libc/i386/_fpmath.h
+++ b/lib/libc/i386/_fpmath.h
@@ -35,6 +35,11 @@ union IEEEl2bits {
unsigned int sign :1;
unsigned int junk :16;
} bits;
+ struct {
+ unsigned long long man :64;
+ unsigned int expsign :16;
+ unsigned int junk :16;
+ } xbits;
};
#define LDBL_NBIT 0x80000000
@@ -46,4 +51,4 @@ union IEEEl2bits {
#define LDBL_TO_ARRAY32(u, a) do { \
(a)[0] = (uint32_t)(u).bits.manl; \
(a)[1] = (uint32_t)(u).bits.manh; \
-} while(0)
+} while (0)
diff --git a/lib/libc/ia64/_fpmath.h b/lib/libc/ia64/_fpmath.h
index 7f24e768ff2d..936ce23b6ef3 100644
--- a/lib/libc/ia64/_fpmath.h
+++ b/lib/libc/ia64/_fpmath.h
@@ -46,6 +46,17 @@ union IEEEl2bits {
unsigned int manl :32;
#endif
} bits;
+ struct {
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+ unsigned long man :64;
+ unsigned int expsign :16;
+ unsigned long junk :48;
+#else /* _BIG_ENDIAN */
+ unsigned long junk :48;
+ unsigned int expsign :16;
+ unsigned long man :64;
+#endif
+ } xbits;
};
#if _BYTE_ORDER == _LITTLE_ENDIAN
@@ -68,4 +79,4 @@ union IEEEl2bits {
#define LDBL_TO_ARRAY32(u, a) do { \
(a)[0] = (uint32_t)(u).bits.manl; \
(a)[1] = (uint32_t)(u).bits.manh; \
-} while(0)
+} while (0)