diff options
author | Mateusz Guzik <mjg@FreeBSD.org> | 2018-11-15 20:28:35 +0000 |
---|---|---|
committer | Mateusz Guzik <mjg@FreeBSD.org> | 2018-11-15 20:28:35 +0000 |
commit | ad2ff705a458c48d540a2fea4917cebad47deb82 (patch) | |
tree | 009942221e4fab98730e6351a03ac014c6b31b11 /lib/libc/amd64 | |
parent | 6fff6344554e70f00588f2f26dcb223904096044 (diff) | |
download | src-ad2ff705a458c48d540a2fea4917cebad47deb82.tar.gz src-ad2ff705a458c48d540a2fea4917cebad47deb82.zip |
amd64: sync up libc memset with the kernel version
- tidy up memset to have rax set earlier for small sizes
- finish the tail in memset with an overlapping store
- align memset buffers to 16 bytes before using rep stos
Sponsored by: The FreeBSD Foundation
Notes
Notes:
svn path=/head/; revision=340464
Diffstat (limited to 'lib/libc/amd64')
-rw-r--r-- | lib/libc/amd64/string/memset.S | 58 |
1 files changed, 39 insertions, 19 deletions
diff --git a/lib/libc/amd64/string/memset.S b/lib/libc/amd64/string/memset.S index 7b65645345e4..a68d478b7dcf 100644 --- a/lib/libc/amd64/string/memset.S +++ b/lib/libc/amd64/string/memset.S @@ -31,12 +31,14 @@ #include <machine/asm.h> __FBSDID("$FreeBSD$"); +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ + .macro MEMSET erms - movq %rdi,%r9 + movq %rdi,%rax movq %rdx,%rcx movzbq %sil,%r8 - movabs $0x0101010101010101,%rax - imulq %r8,%rax + movabs $0x0101010101010101,%r10 + imulq %r8,%r10 cmpq $32,%rcx jb 1016f @@ -45,10 +47,10 @@ __FBSDID("$FreeBSD$"); ja 1256f 1032: - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %r10,16(%rdi) + movq %r10,24(%rdi) leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx @@ -58,54 +60,72 @@ __FBSDID("$FreeBSD$"); 1016: cmpb $16,%cl jl 1008f - movq %rax,(%rdi) - movq %rax,8(%rdi) + movq %r10,(%rdi) + movq %r10,8(%rdi) subb $16,%cl jz 1000f leaq 16(%rdi),%rdi 1008: cmpb $8,%cl jl 1004f - movq %rax,(%rdi) + movq %r10,(%rdi) subb $8,%cl jz 1000f leaq 8(%rdi),%rdi 1004: cmpb $4,%cl jl 1002f - movl %eax,(%rdi) + movl %r10d,(%rdi) subb $4,%cl jz 1000f leaq 4(%rdi),%rdi 1002: cmpb $2,%cl jl 1001f - movw %ax,(%rdi) + movw %r10w,(%rdi) subb $2,%cl jz 1000f leaq 2(%rdi),%rdi 1001: cmpb $1,%cl jl 1000f - movb %al,(%rdi) + movb %r10b,(%rdi) 1000: - movq %r9,%rax ret - + ALIGN_TEXT 1256: + movq %rdi,%r9 + movq %r10,%rax + testl $15,%edi + jnz 3f +1: .if \erms == 1 rep stosb + movq %r9,%rax .else + movq %rcx,%rdx shrq $3,%rcx rep stosq - movq %rdx,%rcx - andb $7,%cl - jne 1004b -.endif movq %r9,%rax + andl $7,%edx + jnz 2f + ret +2: + movq %r10,-8(%rdi,%rdx) +.endif ret + ALIGN_TEXT +3: + movq %r10,(%rdi) + movq %r10,8(%rdi) + movq %rdi,%r8 + andq $15,%r8 + leaq -16(%rcx,%r8),%rcx + neg %r8 + leaq 16(%rdi,%r8),%rdi + jmp 1b .endm ENTRY(memset) |