diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/string/x86_64/memset.s | 30 | 
1 files changed, 16 insertions, 14 deletions
| diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s index fc06eef8..263336b5 100644 --- a/src/string/x86_64/memset.s +++ b/src/string/x86_64/memset.s @@ -1,41 +1,43 @@  .global memset  .type memset,@function  memset: -	and $0xff,%esi +	movzbl %sil,%esi  	mov $0x101010101010101,%rax -	mov %rdx,%rcx -	mov %rdi,%r8 +	# 64-bit imul has 3-7 cycles latency, launch early  	imul %rsi,%rax -	cmp $16,%rcx + +	cmp $16,%rdx  	jb 1f -	mov %rax,-8(%rdi,%rcx) +	mov %rdx,%rcx +	mov %rdi,%r8  	shr $3,%rcx +	mov %rax,-8(%rdi,%rdx)  	rep  	stosq  	mov %r8,%rax  	ret -1:	test %ecx,%ecx +1:	test %edx,%edx  	jz 1f  	mov %al,(%rdi) -	mov %al,-1(%rdi,%rcx) -	cmp $2,%ecx +	mov %al,-1(%rdi,%rdx) +	cmp $2,%edx  	jbe 1f  	mov %al,1(%rdi) -	mov %al,-2(%rdi,%rcx) -	cmp $4,%ecx +	mov %al,-2(%rdi,%rdx) +	cmp $4,%edx  	jbe 1f  	mov %eax,(%rdi) -	mov %eax,-4(%rdi,%rcx) -	cmp $8,%ecx +	mov %eax,-4(%rdi,%rdx) +	cmp $8,%edx  	jbe 1f  	mov %eax,4(%rdi) -	mov %eax,-8(%rdi,%rcx) +	mov %eax,-8(%rdi,%rdx) -1:	mov %r8,%rax +1:	mov %rdi,%rax  	ret | 
