summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/string/i386/memset.s93
1 files changed, 61 insertions, 32 deletions
diff --git a/src/string/i386/memset.s b/src/string/i386/memset.s
index 06ac923e..d00422c4 100644
--- a/src/string/i386/memset.s
+++ b/src/string/i386/memset.s
@@ -1,47 +1,76 @@
.global memset
.type memset,@function
memset:
- mov 8(%esp),%al
- push %edi
- mov %al,%ah
- mov %al,%dl
- mov 16(%esp),%ecx
- shl $16,%eax
- mov 8(%esp),%edi
- mov %dl,%al
- mov %dl,%ah
- cmp $16,%ecx
- jb 1f
+ mov 12(%esp),%ecx
+ cmp $62,%ecx
+ ja 2f
- mov %eax,-4(%edi,%ecx)
- shr $2,%ecx
- rep
- stosl
- mov 8(%esp),%eax
- pop %edi
- ret
-
-1: test %ecx,%ecx
+ mov 8(%esp),%dl
+ mov 4(%esp),%eax
+ test %ecx,%ecx
jz 1f
- mov %al,(%edi)
- mov %al,-1(%edi,%ecx)
+ mov %dl,%dh
+
+ mov %dl,(%eax)
+ mov %dl,-1(%eax,%ecx)
cmp $2,%ecx
jbe 1f
- mov %al,1(%edi)
- mov %al,-2(%edi,%ecx)
- cmp $4,%ecx
+ mov %dx,1(%eax)
+ mov %dx,(-1-2)(%eax,%ecx)
+ cmp $6,%ecx
jbe 1f
- mov %eax,(%edi)
- mov %eax,-4(%edi,%ecx)
- cmp $8,%ecx
+ shl $16,%edx
+ mov 8(%esp),%dl
+ mov 8(%esp),%dh
+
+ mov %edx,(1+2)(%eax)
+ mov %edx,(-1-2-4)(%eax,%ecx)
+ cmp $14,%ecx
jbe 1f
- mov %eax,4(%edi)
- mov %eax,-8(%edi,%ecx)
+ mov %edx,(1+2+4)(%eax)
+ mov %edx,(1+2+4+4)(%eax)
+ mov %edx,(-1-2-4-8)(%eax,%ecx)
+ mov %edx,(-1-2-4-4)(%eax,%ecx)
+ cmp $30,%ecx
+ jbe 1f
+
+ mov %edx,(1+2+4+8)(%eax)
+ mov %edx,(1+2+4+8+4)(%eax)
+ mov %edx,(1+2+4+8+8)(%eax)
+ mov %edx,(1+2+4+8+12)(%eax)
+ mov %edx,(-1-2-4-8-16)(%eax,%ecx)
+ mov %edx,(-1-2-4-8-12)(%eax,%ecx)
+ mov %edx,(-1-2-4-8-8)(%eax,%ecx)
+ mov %edx,(-1-2-4-8-4)(%eax,%ecx)
+
+1: ret
+
+2: movzbl 8(%esp),%eax
+ mov %edi,12(%esp)
+ imul $0x1010101,%eax
+ mov 4(%esp),%edi
+ test $15,%edi
+ mov %eax,-4(%edi,%ecx)
+ jnz 2f
-1: mov 8(%esp),%eax
- pop %edi
+1: shr $2, %ecx
+ rep
+ stosl
+ mov 4(%esp),%eax
+ mov 12(%esp),%edi
ret
+
+2: xor %edx,%edx
+ sub %edi,%edx
+ and $15,%edx
+ mov %eax,(%edi)
+ mov %eax,4(%edi)
+ mov %eax,8(%edi)
+ mov %eax,12(%edi)
+ sub %edx,%ecx
+ add %edx,%edi
+ jmp 1b