Optimize strchr for x86.
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned searches aren't penalized. Special-case searching for 0.
This commit is contained in:
parent
804c0cc6d0
commit
4962a9453a
|
@ -1,3 +1,9 @@
|
|||
2008-05-21 Eric Blake <ebb9@byu.net>
|
||||
|
||||
Optimize strchr for x86.
|
||||
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned
|
||||
searches aren't penalized. Special-case searching for 0.
|
||||
|
||||
2008-05-20 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak).
|
||||
|
@ -5,7 +11,7 @@
|
|||
2008-05-20 DJ Delorie <dj@redhat.com>
|
||||
|
||||
* libc/sys/sysnecv850/isatty.c (_isatty): Renamed from isatty.
|
||||
|
||||
|
||||
2008-05-14 Jeff Johnston <jjohnstn@redhat.com>
|
||||
|
||||
* libc/include/sys/reent.h: Change _REENT_INIT... macros to
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
||||
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
|
@ -9,7 +9,7 @@
|
|||
*/
|
||||
|
||||
#include "i386mach.h"
|
||||
|
||||
|
||||
.global SYM (strchr)
|
||||
SOTYPE_FUNCTION(strchr)
|
||||
|
||||
|
@ -21,14 +21,45 @@ SYM (strchr):
|
|||
pushl ebx
|
||||
xorl ebx,ebx
|
||||
movl 8(ebp),edi
|
||||
movb 12(ebp),bl
|
||||
addb 12(ebp),bl
|
||||
|
||||
#ifndef __OPTIMIZE_SIZE__
|
||||
/* check if string is aligned, if not do check one byte at a time */
|
||||
#ifndef __OPTIMIZE_SIZE__
|
||||
/* Special case strchr(p,0). */
|
||||
je L25
|
||||
|
||||
/* Do byte-wise checks until string is aligned. */
|
||||
test $3,edi
|
||||
jne L9
|
||||
je L5
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L14
|
||||
cmpb bl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
test $3,edi
|
||||
je L5
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L14
|
||||
cmpb bl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
test $3,edi
|
||||
je L5
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L14
|
||||
cmpb bl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
/* create 4 byte mask which is just the desired byte repeated 4 times */
|
||||
L5:
|
||||
movl ebx,ecx
|
||||
sall $8,ebx
|
||||
subl $4,edi
|
||||
|
@ -49,15 +80,14 @@ L10:
|
|||
testl $-2139062144,edx
|
||||
jne L9
|
||||
|
||||
movl ebx,eax
|
||||
xorl ecx,eax
|
||||
leal -16843009(eax),edx
|
||||
notl eax
|
||||
andl eax,edx
|
||||
xorl ebx,ecx
|
||||
leal -16843009(ecx),edx
|
||||
notl ecx
|
||||
andl ecx,edx
|
||||
testl $-2139062144,edx
|
||||
je L10
|
||||
#endif /* not __OPTIMIZE_SIZE__ */
|
||||
|
||||
|
||||
/* loop while (*s && *s++ != c) */
|
||||
L9:
|
||||
leal -1(edi),eax
|
||||
|
@ -69,7 +99,7 @@ L15:
|
|||
je L14
|
||||
cmpb bl,dl
|
||||
jne L15
|
||||
|
||||
|
||||
L14:
|
||||
/* if (*s == c) return address otherwise return NULL */
|
||||
cmpb bl,(eax)
|
||||
|
@ -83,3 +113,60 @@ L19:
|
|||
leave
|
||||
ret
|
||||
|
||||
#ifndef __OPTIMIZE_SIZE__
|
||||
/* Special case strchr(p,0). */
|
||||
#if 0
|
||||
/* Hideous performance on modern machines. */
|
||||
L25:
|
||||
cld
|
||||
movl $-1,ecx
|
||||
xor eax,eax
|
||||
repnz
|
||||
scasb
|
||||
leal -1(edi),eax
|
||||
jmp L19
|
||||
#endif
|
||||
L25:
|
||||
/* Do byte-wise checks until string is aligned. */
|
||||
test $3,edi
|
||||
je L26
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
test $3,edi
|
||||
je L26
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
test $3,edi
|
||||
je L26
|
||||
movl edi,eax
|
||||
movb (eax),cl
|
||||
testb cl,cl
|
||||
je L19
|
||||
incl edi
|
||||
|
||||
L26:
|
||||
subl $4,edi
|
||||
|
||||
/* loop performing 4 byte mask checking for desired 0 byte */
|
||||
.p2align 4,,7
|
||||
L27:
|
||||
addl $4,edi
|
||||
movl (edi),ecx
|
||||
leal -16843009(ecx),edx
|
||||
movl ecx,eax
|
||||
notl eax
|
||||
andl eax,edx
|
||||
testl $-2139062144,edx
|
||||
je L27
|
||||
|
||||
jmp L9
|
||||
|
||||
#endif /* !__OPTIMIZE_SIZE__ */
|
||||
|
|
Loading…
Reference in New Issue