mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-19 12:59:21 +08:00
5c86f0da5f
This patch adds implementations of memcpy, memmove, memset and strcmp optimized for size. The changes have been tested in riscv/riscv-gnu-toolchain by riscv-dejagnu with riscv-sim.exp/riscv-sim-nano.exp.
165 lines
3.1 KiB
ArmAsm
165 lines
3.1 KiB
ArmAsm
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
|
|
|
|
This copyrighted material is made available to anyone wishing to use,
|
|
modify, copy, or redistribute it subject to the terms and conditions
|
|
of the FreeBSD License. This program is distributed in the hope that
|
|
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
|
|
including the implied warranties of MERCHANTABILITY or FITNESS FOR
|
|
A PARTICULAR PURPOSE. A copy of this license is available at
|
|
http://www.opensource.org/licenses.
|
|
*/
|
|
|
|
#include <sys/asm.h>
|
|
|
|
#if BYTE_ORDER != LITTLE_ENDIAN
|
|
# error
|
|
#endif
|
|
|
|
.text
|
|
.globl strcmp
|
|
.type strcmp, @function
|
|
strcmp:
|
|
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
|
1:
|
|
lbu a2, 0(a0)
|
|
lbu a3, 0(a1)
|
|
add a0, a0, 1
|
|
add a1, a1, 1
|
|
bne a2, a3, 2f
|
|
bnez a2, 1b
|
|
|
|
2:
|
|
sub a0, a2, a3
|
|
ret
|
|
|
|
.size strcmp, .-strcmp
|
|
#else
|
|
or a4, a0, a1
|
|
li t2, -1
|
|
and a4, a4, SZREG-1
|
|
bnez a4, .Lmisaligned
|
|
|
|
#if SZREG == 4
|
|
li a5, 0x7f7f7f7f
|
|
#else
|
|
ld a5, mask
|
|
#endif
|
|
|
|
.macro check_one_word i n
|
|
REG_L a2, \i*SZREG(a0)
|
|
REG_L a3, \i*SZREG(a1)
|
|
|
|
and t0, a2, a5
|
|
or t1, a2, a5
|
|
add t0, t0, a5
|
|
or t0, t0, t1
|
|
|
|
bne t0, t2, .Lnull\i
|
|
.if \i+1-\n
|
|
bne a2, a3, .Lmismatch
|
|
.else
|
|
add a0, a0, \n*SZREG
|
|
add a1, a1, \n*SZREG
|
|
beq a2, a3, .Lloop
|
|
# fall through to .Lmismatch
|
|
.endif
|
|
.endm
|
|
|
|
.macro foundnull i n
|
|
.ifne \i
|
|
.Lnull\i:
|
|
add a0, a0, \i*SZREG
|
|
add a1, a1, \i*SZREG
|
|
.ifeq \i-1
|
|
.Lnull0:
|
|
.endif
|
|
bne a2, a3, .Lmisaligned
|
|
li a0, 0
|
|
ret
|
|
.endif
|
|
.endm
|
|
|
|
.Lloop:
|
|
# examine full words at a time, favoring strings of a couple dozen chars
|
|
#if __riscv_xlen == 32
|
|
check_one_word 0 5
|
|
check_one_word 1 5
|
|
check_one_word 2 5
|
|
check_one_word 3 5
|
|
check_one_word 4 5
|
|
#else
|
|
check_one_word 0 3
|
|
check_one_word 1 3
|
|
check_one_word 2 3
|
|
#endif
|
|
# backwards branch to .Lloop contained above
|
|
|
|
.Lmismatch:
|
|
# words don't match, but a2 has no null byte.
|
|
#if __riscv_xlen == 64
|
|
sll a4, a2, 48
|
|
sll a5, a3, 48
|
|
bne a4, a5, .Lmismatch_upper
|
|
sll a4, a2, 32
|
|
sll a5, a3, 32
|
|
bne a4, a5, .Lmismatch_upper
|
|
#endif
|
|
sll a4, a2, 16
|
|
sll a5, a3, 16
|
|
bne a4, a5, .Lmismatch_upper
|
|
|
|
srl a4, a2, 8*SZREG-16
|
|
srl a5, a3, 8*SZREG-16
|
|
sub a0, a4, a5
|
|
and a1, a0, 0xff
|
|
bnez a1, 1f
|
|
ret
|
|
|
|
.Lmismatch_upper:
|
|
srl a4, a4, 8*SZREG-16
|
|
srl a5, a5, 8*SZREG-16
|
|
sub a0, a4, a5
|
|
and a1, a0, 0xff
|
|
bnez a1, 1f
|
|
ret
|
|
|
|
1:and a4, a4, 0xff
|
|
and a5, a5, 0xff
|
|
sub a0, a4, a5
|
|
ret
|
|
|
|
.Lmisaligned:
|
|
# misaligned
|
|
lbu a2, 0(a0)
|
|
lbu a3, 0(a1)
|
|
add a0, a0, 1
|
|
add a1, a1, 1
|
|
bne a2, a3, 1f
|
|
bnez a2, .Lmisaligned
|
|
|
|
1:
|
|
sub a0, a2, a3
|
|
ret
|
|
|
|
# cases in which a null byte was detected
|
|
#if __riscv_xlen == 32
|
|
foundnull 0 5
|
|
foundnull 1 5
|
|
foundnull 2 5
|
|
foundnull 3 5
|
|
foundnull 4 5
|
|
#else
|
|
foundnull 0 3
|
|
foundnull 1 3
|
|
foundnull 2 3
|
|
#endif
|
|
.size strcmp, .-strcmp
|
|
|
|
#if SZREG == 8
|
|
.section .srodata.cst8,"aM",@progbits,8
|
|
.align 3
|
|
mask:
|
|
.dword 0x7f7f7f7f7f7f7f7f
|
|
#endif
|
|
#endif
|