summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/string/x86_64/memset.s30
1 files changed, 16 insertions, 14 deletions
diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s
index fc06eef8..263336b5 100644
--- a/src/string/x86_64/memset.s
+++ b/src/string/x86_64/memset.s
@@ -1,41 +1,43 @@
.global memset
.type memset,@function
memset:
- and $0xff,%esi
+ movzbl %sil,%esi
mov $0x101010101010101,%rax
- mov %rdx,%rcx
- mov %rdi,%r8
+ # 64-bit imul has 3-7 cycles latency, launch early
imul %rsi,%rax
- cmp $16,%rcx
+
+ cmp $16,%rdx
jb 1f
- mov %rax,-8(%rdi,%rcx)
+ mov %rdx,%rcx
+ mov %rdi,%r8
shr $3,%rcx
+ mov %rax,-8(%rdi,%rdx)
rep
stosq
mov %r8,%rax
ret
-1: test %ecx,%ecx
+1: test %edx,%edx
jz 1f
mov %al,(%rdi)
- mov %al,-1(%rdi,%rcx)
- cmp $2,%ecx
+ mov %al,-1(%rdi,%rdx)
+ cmp $2,%edx
jbe 1f
mov %al,1(%rdi)
- mov %al,-2(%rdi,%rcx)
- cmp $4,%ecx
+ mov %al,-2(%rdi,%rdx)
+ cmp $4,%edx
jbe 1f
mov %eax,(%rdi)
- mov %eax,-4(%rdi,%rcx)
- cmp $8,%ecx
+ mov %eax,-4(%rdi,%rdx)
+ cmp $8,%edx
jbe 1f
mov %eax,4(%rdi)
- mov %eax,-8(%rdi,%rcx)
+ mov %eax,-8(%rdi,%rdx)
-1: mov %r8,%rax
+1: mov %rdi,%rax
ret