aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/amd64
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2018-11-15 20:28:35 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2018-11-15 20:28:35 +0000
commitad2ff705a458c48d540a2fea4917cebad47deb82 (patch)
tree009942221e4fab98730e6351a03ac014c6b31b11 /lib/libc/amd64
parent6fff6344554e70f00588f2f26dcb223904096044 (diff)
downloadsrc-ad2ff705a458c48d540a2fea4917cebad47deb82.tar.gz
src-ad2ff705a458c48d540a2fea4917cebad47deb82.zip
amd64: sync up libc memset with the kernel version
- tidy up memset to have rax set earlier for small sizes - finish the tail in memset with an overlapping store - align memset buffers to 16 bytes before using rep stos Sponsored by: The FreeBSD Foundation
Notes
Notes: svn path=/head/; revision=340464
Diffstat (limited to 'lib/libc/amd64')
-rw-r--r--lib/libc/amd64/string/memset.S58
1 files changed, 39 insertions, 19 deletions
diff --git a/lib/libc/amd64/string/memset.S b/lib/libc/amd64/string/memset.S
index 7b65645345e4..a68d478b7dcf 100644
--- a/lib/libc/amd64/string/memset.S
+++ b/lib/libc/amd64/string/memset.S
@@ -31,12 +31,14 @@
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
.macro MEMSET erms
- movq %rdi,%r9
+ movq %rdi,%rax
movq %rdx,%rcx
movzbq %sil,%r8
- movabs $0x0101010101010101,%rax
- imulq %r8,%rax
+ movabs $0x0101010101010101,%r10
+ imulq %r8,%r10
cmpq $32,%rcx
jb 1016f
@@ -45,10 +47,10 @@ __FBSDID("$FreeBSD$");
ja 1256f
1032:
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- movq %rax,24(%rdi)
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r10,24(%rdi)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
@@ -58,54 +60,72 @@ __FBSDID("$FreeBSD$");
1016:
cmpb $16,%cl
jl 1008f
- movq %rax,(%rdi)
- movq %rax,8(%rdi)
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
subb $16,%cl
jz 1000f
leaq 16(%rdi),%rdi
1008:
cmpb $8,%cl
jl 1004f
- movq %rax,(%rdi)
+ movq %r10,(%rdi)
subb $8,%cl
jz 1000f
leaq 8(%rdi),%rdi
1004:
cmpb $4,%cl
jl 1002f
- movl %eax,(%rdi)
+ movl %r10d,(%rdi)
subb $4,%cl
jz 1000f
leaq 4(%rdi),%rdi
1002:
cmpb $2,%cl
jl 1001f
- movw %ax,(%rdi)
+ movw %r10w,(%rdi)
subb $2,%cl
jz 1000f
leaq 2(%rdi),%rdi
1001:
cmpb $1,%cl
jl 1000f
- movb %al,(%rdi)
+ movb %r10b,(%rdi)
1000:
- movq %r9,%rax
ret
-
+ ALIGN_TEXT
1256:
+ movq %rdi,%r9
+ movq %r10,%rax
+ testl $15,%edi
+ jnz 3f
+1:
.if \erms == 1
rep
stosb
+ movq %r9,%rax
.else
+ movq %rcx,%rdx
shrq $3,%rcx
rep
stosq
- movq %rdx,%rcx
- andb $7,%cl
- jne 1004b
-.endif
movq %r9,%rax
+ andl $7,%edx
+ jnz 2f
+ ret
+2:
+ movq %r10,-8(%rdi,%rdx)
+.endif
ret
+ ALIGN_TEXT
+3:
+ movq %r10,(%rdi)
+ movq %r10,8(%rdi)
+ movq %rdi,%r8
+ andq $15,%r8
+ leaq -16(%rcx,%r8),%rcx
+ neg %r8
+ leaq 16(%rdi,%r8),%rdi
+ jmp 1b
.endm
ENTRY(memset)