mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-02-19 07:22:14 +08:00
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memchr. * libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]: Pre-align pointer so unaligned searches aren't penalized. * libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
This commit is contained in:
parent
a6bd72a278
commit
70bff2d503
@ -1,3 +1,11 @@
|
||||
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||
|
||||
Optimize the generic and x86 memchr.
|
||||
* libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
|
||||
Pre-align pointer so unaligned searches aren't penalized.
|
||||
* libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
|
||||
operations are faster than repnz byte searches.
|
||||
|
||||
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||
|
||||
Optimize the generic and x86 memset.
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
||||
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
@ -9,21 +9,23 @@
|
||||
*/
|
||||
|
||||
#include "i386mach.h"
|
||||
|
||||
|
||||
.global SYM (memchr)
|
||||
SOTYPE_FUNCTION(memchr)
|
||||
|
||||
SYM (memchr):
|
||||
pushl ebp
|
||||
movl esp,ebp
|
||||
pushl edi
|
||||
movl 12(ebp),eax
|
||||
movl 16(ebp),ecx
|
||||
movl 8(ebp),edi
|
||||
pushl edi
|
||||
movzbl 12(ebp),eax
|
||||
movl 16(ebp),ecx
|
||||
movl 8(ebp),edi
|
||||
|
||||
xorl edx,edx
|
||||
testl ecx,ecx
|
||||
jz L1
|
||||
jz L20
|
||||
|
||||
#ifdef __OPTIMIZE_SIZE__
|
||||
|
||||
cld
|
||||
repnz
|
||||
@ -31,9 +33,79 @@ SYM (memchr):
|
||||
|
||||
setnz dl
|
||||
decl edi
|
||||
|
||||
#else /* !__OPTIMIZE_SIZE__ */
|
||||
/* Do byte-wise checks until string is aligned. */
|
||||
testl $3,edi
|
||||
je L5
|
||||
cmpb (edi),al
|
||||
je L15
|
||||
incl edi
|
||||
decl ecx
|
||||
je L20
|
||||
|
||||
testl $3,edi
|
||||
je L5
|
||||
cmpb (edi),al
|
||||
je L15
|
||||
incl edi
|
||||
decl ecx
|
||||
je L20
|
||||
|
||||
testl $3,edi
|
||||
je L5
|
||||
cmpb (edi),al
|
||||
je L15
|
||||
incl edi
|
||||
decl ecx
|
||||
je L20
|
||||
|
||||
/* Create a mask, then check a word at a time. */
|
||||
L5:
|
||||
movb al,ah
|
||||
movl eax,edx
|
||||
sall $16,edx
|
||||
orl edx,eax
|
||||
pushl ebx
|
||||
|
||||
.p2align 4,,7
|
||||
L8:
|
||||
subl $4,ecx
|
||||
jc L9
|
||||
movl (edi),edx
|
||||
addl $4,edi
|
||||
xorl eax,edx
|
||||
leal -16843009(edx),ebx
|
||||
notl edx
|
||||
andl edx,ebx
|
||||
testl $-2139062144,ebx
|
||||
je L8
|
||||
|
||||
subl $4,edi
|
||||
|
||||
L9:
|
||||
popl ebx
|
||||
xorl edx,edx
|
||||
addl $4,ecx
|
||||
je L20
|
||||
|
||||
/* Final byte-wise checks. */
|
||||
.p2align 4,,7
|
||||
L10:
|
||||
cmpb (edi),al
|
||||
je L15
|
||||
incl edi
|
||||
decl ecx
|
||||
jne L10
|
||||
|
||||
xorl edi,edi
|
||||
|
||||
#endif /* !__OPTIMIZE_SIZE__ */
|
||||
|
||||
L15:
|
||||
decl edx
|
||||
andl edi,edx
|
||||
L1:
|
||||
L20:
|
||||
movl edx,eax
|
||||
|
||||
leal -4(ebp),esp
|
||||
|
@ -20,7 +20,7 @@ DESCRIPTION
|
||||
This function searches memory starting at <<*<[src]>>> for the
|
||||
character <[c]>. The search only ends with the first
|
||||
occurrence of <[c]>, or after <[length]> characters; in
|
||||
particular, <<NULL>> does not terminate the search.
|
||||
particular, <<NUL>> does not terminate the search.
|
||||
|
||||
RETURNS
|
||||
If the character <[c]> is found within <[length]> characters
|
||||
@ -64,6 +64,9 @@ QUICKREF
|
||||
#error long int is not a 32bit or 64bit byte
|
||||
#endif
|
||||
|
||||
/* DETECTCHAR returns nonzero if (long)X contains the byte used
|
||||
to fill (long)MASK. */
|
||||
#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
|
||||
|
||||
_PTR
|
||||
_DEFUN (memchr, (src_void, c, length),
|
||||
@ -71,73 +74,61 @@ _DEFUN (memchr, (src_void, c, length),
|
||||
int c _AND
|
||||
size_t length)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
|
||||
unsigned char d = c;
|
||||
|
||||
c &= 0xff;
|
||||
|
||||
while (length--)
|
||||
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
|
||||
unsigned long *asrc;
|
||||
unsigned long mask;
|
||||
int i;
|
||||
|
||||
while (UNALIGNED (src))
|
||||
{
|
||||
if (*src == c)
|
||||
return (char *) src;
|
||||
if (!length--)
|
||||
return NULL;
|
||||
if (*src == d)
|
||||
return (void *) src;
|
||||
src++;
|
||||
}
|
||||
return NULL;
|
||||
#else
|
||||
_CONST unsigned char *src = (_CONST unsigned char *) src_void;
|
||||
unsigned long *asrc;
|
||||
unsigned long buffer;
|
||||
unsigned long mask;
|
||||
int i, j;
|
||||
|
||||
c &= 0xff;
|
||||
|
||||
/* If the size is small, or src is unaligned, then
|
||||
use the bytewise loop. We can hope this is rare. */
|
||||
if (!TOO_SMALL (length) && !UNALIGNED (src))
|
||||
if (!TOO_SMALL (length))
|
||||
{
|
||||
/* The fast code reads the ASCII one word at a time and only
|
||||
/* If we get this far, we know that length is large and src is
|
||||
word-aligned. */
|
||||
/* The fast code reads the source one word at a time and only
|
||||
performs the bytewise search on word-sized segments if they
|
||||
contain the search character, which is detected by XORing
|
||||
contain the search character, which is detected by XORing
|
||||
the word-sized segment with a word-sized block of the search
|
||||
character and then detecting for the presence of NULL in the
|
||||
character and then detecting for the presence of NUL in the
|
||||
result. */
|
||||
asrc = (unsigned long*) src;
|
||||
mask = 0;
|
||||
for (i = 0; i < LBLOCKSIZE; i++)
|
||||
mask = (mask << 8) + c;
|
||||
asrc = (unsigned long *) src;
|
||||
mask = d << 8 | d;
|
||||
mask = mask << 16 | mask;
|
||||
for (i = 32; i < LBLOCKSIZE * 8; i <<= 1)
|
||||
mask = (mask << i) | mask;
|
||||
|
||||
while (length >= LBLOCKSIZE)
|
||||
{
|
||||
buffer = *asrc;
|
||||
buffer ^= mask;
|
||||
if (DETECTNULL (buffer))
|
||||
{
|
||||
src = (unsigned char*) asrc;
|
||||
for ( j = 0; j < LBLOCKSIZE; j++ )
|
||||
{
|
||||
if (*src == c)
|
||||
return (char*) src;
|
||||
src++;
|
||||
}
|
||||
}
|
||||
if (DETECTCHAR (*asrc, mask))
|
||||
break;
|
||||
length -= LBLOCKSIZE;
|
||||
asrc++;
|
||||
}
|
||||
|
||||
|
||||
/* If there are fewer than LBLOCKSIZE characters left,
|
||||
then we resort to the bytewise loop. */
|
||||
|
||||
src = (unsigned char*) asrc;
|
||||
src = (unsigned char *) asrc;
|
||||
}
|
||||
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
|
||||
while (length--)
|
||||
{
|
||||
if (*src == c)
|
||||
return (char*) src;
|
||||
{
|
||||
if (*src == d)
|
||||
return (void *) src;
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user