mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-02-20 16:01:10 +08:00
* libc/machine/sh/strncpy.S: New file.
* libc/machine/sh/Makefile.am: Add entry & rule for new file. * libc/machine/sh/Makefile.in: Regenerate.
This commit is contained in:
parent
d950ca774f
commit
f4b1060529
@ -1,3 +1,9 @@
|
||||
2003-09-29 J"orn Rennecke <joern.rennecke@superh.com>
|
||||
|
||||
* libc/machine/sh/strncpy.S: New file.
|
||||
* libc/machine/sh/Makefile.am: Add entry & rule for new file.
|
||||
* libc/machine/sh/Makefile.in: Regenerate.
|
||||
|
||||
2003-09-11 James E Wilson <wilson@specifixinc.com>
|
||||
|
||||
* MAINTAINERS: Update my e-mail address.
|
||||
|
@ -6,13 +6,18 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
|
||||
|
||||
noinst_LIBRARIES = lib.a
|
||||
|
||||
if SH64
|
||||
lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S
|
||||
else
|
||||
lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S
|
||||
endif
|
||||
|
||||
memcpy.o: asm.h
|
||||
memset.o: asm.h
|
||||
setjmp.o: asm.h
|
||||
strcpy.o: asm.h
|
||||
strcmp.o: asm.h
|
||||
strncpy.o: asm.h
|
||||
|
||||
ACLOCAL_AMFLAGS = -I ../../..
|
||||
CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Makefile.in generated automatically by automake 1.4 from Makefile.am
|
||||
# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am
|
||||
|
||||
# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc.
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
@ -88,8 +88,8 @@ AUTOMAKE_OPTIONS = cygnus
|
||||
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
|
||||
|
||||
noinst_LIBRARIES = lib.a
|
||||
|
||||
lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S
|
||||
@SH64_TRUE@lib_a_SOURCES = @SH64_TRUE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S
|
||||
@SH64_FALSE@lib_a_SOURCES = @SH64_FALSE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S
|
||||
|
||||
ACLOCAL_AMFLAGS = -I ../../..
|
||||
CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host
|
||||
@ -103,7 +103,10 @@ DEFS = @DEFS@ -I. -I$(srcdir)
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
LIBS = @LIBS@
|
||||
lib_a_LIBADD =
|
||||
lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o strcmp.o
|
||||
@SH64_TRUE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o \
|
||||
@SH64_TRUE@strcmp.o strncpy.o
|
||||
@SH64_FALSE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o \
|
||||
@SH64_FALSE@strlen.o strcmp.o
|
||||
CFLAGS = @CFLAGS@
|
||||
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
CCLD = $(CC)
|
||||
@ -327,6 +330,7 @@ memset.o: asm.h
|
||||
setjmp.o: asm.h
|
||||
strcpy.o: asm.h
|
||||
strcmp.o: asm.h
|
||||
strncpy.o: asm.h
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
|
209
newlib/libc/machine/sh/strncpy.S
Normal file
209
newlib/libc/machine/sh/strncpy.S
Normal file
@ -0,0 +1,209 @@
|
||||
/* Copyright 2003 SuperH Ltd. */
|
||||
|
||||
#include "asm.h"
|
||||
|
||||
#ifdef __SH5__
|
||||
#if __SHMEDIA__
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
#define ZPAD_MASK(src, dst) addi src, -1, dst
|
||||
#else
|
||||
#define ZPAD_MASK(src, dst) \
|
||||
byterev src, dst; addi dst, -1, dst; byterev dst, dst
|
||||
#endif
|
||||
|
||||
|
||||
/* We assume that the destination is not in the first 16 bytes of memory.
|
||||
A typical linker script will put the text section first, and as
|
||||
this code is longer that 16 bytes, you have to get out of your way
|
||||
to put data there. */
|
||||
ENTRY(strncpy)
|
||||
pt L_small, tr2
|
||||
ldlo.q r3, 0, r0
|
||||
shlli r3, 3, r19
|
||||
mcmpeq.b r0, r63, r1
|
||||
SHHI r1, r19, r7
|
||||
add r2, r4, r20
|
||||
addi r20, -8, r5
|
||||
/* If the size is greater than 8, we know we can read beyond the first
|
||||
(possibly partial) quadword, and write out a full first and last
|
||||
(possibly unaligned and/or overlapping) quadword. */
|
||||
bge/u r2, r5, tr2 // L_small
|
||||
pt L_found0, tr0
|
||||
addi r2, 8, r22
|
||||
bnei/u r7, 0, tr0 // L_found0
|
||||
ori r3, -8, r38
|
||||
pt L_end_early, tr1
|
||||
sub r2, r38, r22
|
||||
stlo.q r2, 0, r0
|
||||
sthi.q r2, 7, r0
|
||||
sub r3, r2, r6
|
||||
ldx.q r22, r6, r0
|
||||
/* Before each iteration, check that we can store in full the next quad we
|
||||
are about to fetch. */
|
||||
addi r5, -8, r36
|
||||
bgtu/u r22, r36, tr1 // L_end_early
|
||||
pt L_scan0, tr1
|
||||
L_scan0:
|
||||
addi r22, 8, r22
|
||||
mcmpeq.b r0, r63, r1
|
||||
stlo.q r22, -8, r0
|
||||
bnei/u r1, 0, tr0 // L_found0
|
||||
sthi.q r22, -1, r0
|
||||
ldx.q r22, r6, r0
|
||||
bgeu/l r36, r22, tr1 // L_scan0
|
||||
L_end:
|
||||
// At end; we might re-read a few bytes when we fetch the last quad.
|
||||
// branch mispredict, so load is ready now.
|
||||
mcmpeq.b r0, r63, r1
|
||||
addi r22, 8, r22
|
||||
bnei/u r1, 0, tr0 // L_found0
|
||||
add r3, r4, r7
|
||||
ldlo.q r7, -8, r1
|
||||
ldhi.q r7, -1, r7
|
||||
ptabs r18, tr0
|
||||
stlo.q r22, -8, r0
|
||||
or r1, r7, r1
|
||||
mcmpeq.b r1, r63, r7
|
||||
sthi.q r22, -1, r0
|
||||
ZPAD_MASK (r7, r7)
|
||||
and r1, r7, r1 // mask out non-zero bytes after first zero byte
|
||||
stlo.q r20, -8, r1
|
||||
sthi.q r20, -1, r1
|
||||
blink tr0, r63
|
||||
|
||||
L_end_early:
|
||||
/* Check if we can store the current quad in full. */
|
||||
pt L_end, tr1
|
||||
add r3, r4, r7
|
||||
bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short.
|
||||
/* If not, that means we can just proceed to process the last quad.
|
||||
Two pipeline stalls are unavoidable, as we don't have enough ILP. */
|
||||
ldlo.q r7, -8, r1
|
||||
ldhi.q r7, -1, r7
|
||||
ptabs r18, tr0
|
||||
or r1, r7, r1
|
||||
mcmpeq.b r1, r63, r7
|
||||
ZPAD_MASK (r7, r7)
|
||||
and r1, r7, r1 // mask out non-zero bytes after first zero byte
|
||||
stlo.q r20, -8, r1
|
||||
sthi.q r20, -1, r1
|
||||
blink tr0, r63
|
||||
|
||||
L_found0:
|
||||
// r0: string to store, not yet zero-padding normalized.
|
||||
// r1: result of mcmpeq.b r0, r63, r1.
|
||||
// r22: store address plus 8. I.e. address where zero padding beyond the
|
||||
// string in r0 goes.
|
||||
// r20: store end address.
|
||||
// r5: store end address minus 8.
|
||||
pt L_write0_multiquad, tr0
|
||||
ZPAD_MASK (r1, r1)
|
||||
and r0, r1, r0 // mask out non-zero bytes after first zero byte
|
||||
stlo.q r22, -8, r0
|
||||
sthi.q r22, -1, r0
|
||||
andi r22, -8, r1 // Check if zeros to write fit in one quad word.
|
||||
bgtu/l r5, r1, tr0 // L_write0_multiquad
|
||||
ptabs r18, tr1
|
||||
sub r20, r22, r1
|
||||
shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is
|
||||
SHLO r0, r1, r0 // handled correctly.
|
||||
SHLO r0, r1, r0
|
||||
sthi.q r20, -1, r0
|
||||
blink tr1, r63
|
||||
|
||||
L_write0_multiquad:
|
||||
pt L_write0_loop, tr0
|
||||
ptabs r18, tr1
|
||||
stlo.q r22, 0, r63
|
||||
sthi.q r20, -1, r63
|
||||
addi r1, 8, r1
|
||||
bgeu/l r5, r1, tr0 // L_write0_loop
|
||||
blink tr1, r63
|
||||
|
||||
L_write0_loop:
|
||||
st.q r1, 0 ,r63
|
||||
addi r1, 8, r1
|
||||
bgeu/l r5, r1, tr0 // L_write0_loop
|
||||
blink tr1, r63
|
||||
|
||||
L_small:
|
||||
// r0: string to store, not yet zero-padding normalized.
|
||||
// r1: result of mcmpeq.b r0, r63, r1.
|
||||
// r7: nonzero indicates relevant zero found r0.
|
||||
// r2: store address.
|
||||
// r3: read address.
|
||||
// r4: size, max 8
|
||||
// r20: store end address.
|
||||
// r5: store end address minus 8.
|
||||
pt L_nohi, tr0
|
||||
pt L_small_storelong, tr1
|
||||
ptabs r18, tr2
|
||||
sub r63, r4, r23
|
||||
bnei/u r7, 0, tr0 // L_nohi
|
||||
ori r3, -8, r7
|
||||
bge/l r23, r7, tr0 // L_nohi
|
||||
ldhi.q r3, 7, r1
|
||||
or r0, r1, r0
|
||||
mcmpeq.b r0, r63, r1
|
||||
L_nohi:
|
||||
ZPAD_MASK (r1, r1)
|
||||
and r0, r1, r0
|
||||
movi 4, r19
|
||||
bge/u r4, r19, tr1 // L_small_storelong
|
||||
|
||||
pt L_small_end, tr0
|
||||
#ifndef __LITTLE_ENDIAN__
|
||||
byterev r0, r0
|
||||
#endif
|
||||
beqi/u r4, 0, tr0 // L_small_end
|
||||
st.b r2, 0, r0
|
||||
beqi/u r4, 1, tr0 // L_small_end
|
||||
shlri r0, 8, r0
|
||||
st.b r2, 1, r0
|
||||
beqi/u r4, 2, tr0 // L_small_end
|
||||
shlri r0, 8, r0
|
||||
st.b r2, 2, r0
|
||||
L_small_end:
|
||||
blink tr2, r63
|
||||
|
||||
L_small_storelong:
|
||||
shlli r23, 3, r7
|
||||
SHHI r0, r7, r1
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
shlri r1, 32, r1
|
||||
#else
|
||||
shlri r0, 32, r0
|
||||
#endif
|
||||
stlo.l r2, 0, r0
|
||||
sthi.l r2, 3, r0
|
||||
stlo.l r20, -4, r1
|
||||
sthi.l r20, -1, r1
|
||||
blink tr2, r63
|
||||
|
||||
#else /* SHcompact */
|
||||
|
||||
/* This code is optimized for size. Instruction selection is SH5 specific.
|
||||
SH4 should use a different version. */
|
||||
ENTRY(strncpy)
|
||||
mov #0, r6
|
||||
cmp/eq r4, r6
|
||||
bt return
|
||||
mov r2, r5
|
||||
add #-1, r5
|
||||
add r5, r4
|
||||
loop:
|
||||
bt/s found0
|
||||
add #1, r5
|
||||
mov.b @r3+, r1
|
||||
found0:
|
||||
cmp/eq r5,r4
|
||||
mov.b r1, @r5
|
||||
bf/s loop
|
||||
cmp/eq r1, r6
|
||||
return:
|
||||
rts
|
||||
nop
|
||||
|
||||
#endif /* SHcompact */
|
||||
#endif /* __SH5__ */
|
Loading…
x
Reference in New Issue
Block a user