diff options
-rw-r--r-- | src/string/i386/memset.s | 93 |
1 files changed, 61 insertions, 32 deletions
diff --git a/src/string/i386/memset.s b/src/string/i386/memset.s index 06ac923e..d00422c4 100644 --- a/src/string/i386/memset.s +++ b/src/string/i386/memset.s @@ -1,47 +1,76 @@ .global memset .type memset,@function memset: - mov 8(%esp),%al - push %edi - mov %al,%ah - mov %al,%dl - mov 16(%esp),%ecx - shl $16,%eax - mov 8(%esp),%edi - mov %dl,%al - mov %dl,%ah - cmp $16,%ecx - jb 1f + mov 12(%esp),%ecx + cmp $62,%ecx + ja 2f - mov %eax,-4(%edi,%ecx) - shr $2,%ecx - rep - stosl - mov 8(%esp),%eax - pop %edi - ret - -1: test %ecx,%ecx + mov 8(%esp),%dl + mov 4(%esp),%eax + test %ecx,%ecx jz 1f - mov %al,(%edi) - mov %al,-1(%edi,%ecx) + mov %dl,%dh + + mov %dl,(%eax) + mov %dl,-1(%eax,%ecx) cmp $2,%ecx jbe 1f - mov %al,1(%edi) - mov %al,-2(%edi,%ecx) - cmp $4,%ecx + mov %dx,1(%eax) + mov %dx,(-1-2)(%eax,%ecx) + cmp $6,%ecx jbe 1f - mov %eax,(%edi) - mov %eax,-4(%edi,%ecx) - cmp $8,%ecx + shl $16,%edx + mov 8(%esp),%dl + mov 8(%esp),%dh + + mov %edx,(1+2)(%eax) + mov %edx,(-1-2-4)(%eax,%ecx) + cmp $14,%ecx jbe 1f - mov %eax,4(%edi) - mov %eax,-8(%edi,%ecx) + mov %edx,(1+2+4)(%eax) + mov %edx,(1+2+4+4)(%eax) + mov %edx,(-1-2-4-8)(%eax,%ecx) + mov %edx,(-1-2-4-4)(%eax,%ecx) + cmp $30,%ecx + jbe 1f + + mov %edx,(1+2+4+8)(%eax) + mov %edx,(1+2+4+8+4)(%eax) + mov %edx,(1+2+4+8+8)(%eax) + mov %edx,(1+2+4+8+12)(%eax) + mov %edx,(-1-2-4-8-16)(%eax,%ecx) + mov %edx,(-1-2-4-8-12)(%eax,%ecx) + mov %edx,(-1-2-4-8-8)(%eax,%ecx) + mov %edx,(-1-2-4-8-4)(%eax,%ecx) + +1: ret + +2: movzbl 8(%esp),%eax + mov %edi,12(%esp) + imul $0x1010101,%eax + mov 4(%esp),%edi + test $15,%edi + mov %eax,-4(%edi,%ecx) + jnz 2f -1: mov 8(%esp),%eax - pop %edi +1: shr $2, %ecx + rep + stosl + mov 4(%esp),%eax + mov 12(%esp),%edi ret + +2: xor %edx,%edx + sub %edi,%edx + and $15,%edx + mov %eax,(%edi) + mov %eax,4(%edi) + mov %eax,8(%edi) + mov %eax,12(%edi) + sub %edx,%ecx + add %edx,%edi + jmp 1b |