diff options
author | Mateusz Guzik <mjg@FreeBSD.org> | 2018-11-30 20:58:08 +0000 |
---|---|---|
committer | Mateusz Guzik <mjg@FreeBSD.org> | 2018-11-30 20:58:08 +0000 |
commit | 94243af2da71038d14884433111d8724df14e69f (patch) | |
tree | 0a42bb13eb3a50f35e1741a9cca5530066db45f1 /lib/libc/amd64 | |
parent | 1489776d43e7decd9a0a6ba5faae437b0159f68e (diff) | |
download | src-94243af2da71038d14884433111d8724df14e69f.tar.gz src-94243af2da71038d14884433111d8724df14e69f.zip |
amd64: handle small memmove buffers with overlapping stores
Handling sizes of > 32 backwards will be updated later.
Reviewed by: kib (kernel part)
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D18387
Notes
Notes:
svn path=/head/; revision=341351
Diffstat (limited to 'lib/libc/amd64')
-rw-r--r-- | lib/libc/amd64/string/memmove.S | 92 |
1 files changed, 52 insertions, 40 deletions
diff --git a/lib/libc/amd64/string/memmove.S b/lib/libc/amd64/string/memmove.S index 54aa09538039..5a466f7a7d69 100644 --- a/lib/libc/amd64/string/memmove.S +++ b/lib/libc/amd64/string/memmove.S @@ -42,11 +42,19 @@ __FBSDID("$FreeBSD$"); * rsi - source * rdx - count * - * The macro possibly clobbers the above and: rcx, r8. - * It does not clobber rax, r10 nor r11. + * The macro possibly clobbers the above and: rcx, r8, r9, 10 + * It does not clobber rax nor r11. */ .macro MEMMOVE erms overlap begin end \begin + + /* + * For sizes 0..32 all data is read before it is written, so there + * is no correctness issue with direction of copying. + */ + cmpq $32,%rcx + jbe 101632f + .if \overlap == 1 movq %rdi,%r8 subq %rsi,%r8 @@ -54,13 +62,10 @@ __FBSDID("$FreeBSD$"); jb 2f .endif - cmpq $32,%rcx - jb 1016f - cmpq $256,%rcx ja 1256f -1032: +103200: movq (%rsi),%rdx movq %rdx,(%rdi) movq 8(%rsi),%rdx @@ -73,56 +78,62 @@ __FBSDID("$FreeBSD$"); leaq 32(%rdi),%rdi subq $32,%rcx cmpq $32,%rcx - jae 1032b + jae 103200b cmpb $0,%cl - jne 1016f + jne 101632f \end ret ALIGN_TEXT -1016: +101632: cmpb $16,%cl - jl 1008f + jl 100816f movq (%rsi),%rdx + movq 8(%rsi),%r8 + movq -16(%rsi,%rcx),%r9 + movq -8(%rsi,%rcx),%r10 movq %rdx,(%rdi) - movq 8(%rsi),%rdx - movq %rdx,8(%rdi) - subb $16,%cl - jz 1000f - leaq 16(%rsi),%rsi - leaq 16(%rdi),%rdi -1008: + movq %r8,8(%rdi) + movq %r9,-16(%rdi,%rcx) + movq %r10,-8(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100816: cmpb $8,%cl - jl 1004f + jl 100408f movq (%rsi),%rdx + movq -8(%rsi,%rcx),%r8 movq %rdx,(%rdi) - subb $8,%cl - jz 1000f - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi -1004: + movq %r8,-8(%rdi,%rcx,) + \end + ret + ALIGN_TEXT +100408: cmpb $4,%cl - jl 1002f + jl 100204f movl (%rsi),%edx + movl -4(%rsi,%rcx),%r8d movl %edx,(%rdi) - subb $4,%cl - jz 1000f - leaq 4(%rsi),%rsi - leaq 4(%rdi),%rdi -1002: + movl %r8d,-4(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100204: cmpb $2,%cl - jl 1001f - movw (%rsi),%dx + jl 100001f + movzwl (%rsi),%edx + movzwl -2(%rsi,%rcx),%r8d movw %dx,(%rdi) - subb $2,%cl - jz 1000f - leaq 2(%rsi),%rsi - leaq 2(%rdi),%rdi -1001: + movw %r8w,-2(%rdi,%rcx) + \end + ret + ALIGN_TEXT +100001: cmpb $1,%cl - jl 1000f + jl 100000f movb (%rsi),%dl movb %dl,(%rdi) -1000: +100000: \end ret @@ -136,8 +147,8 @@ __FBSDID("$FreeBSD$"); rep movsq movq %rdx,%rcx - andb $7,%cl /* any bytes left? */ - jne 1004b + andl $7,%ecx /* any bytes left? */ + jne 100408b .endif \end ret @@ -247,6 +258,7 @@ __FBSDID("$FreeBSD$"); .endif .endm + .macro MEMMOVE_BEGIN movq %rdi,%rax movq %rdx,%rcx |