Cygwin: x86_64: import latest NetBSD bcopy.S

Tweak slightly to allow implementing entire {w}mem{p}{cpy,move}
family:

Add WIDE macro processing for wmem* and POST macro processing for
memp* functions.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2022-12-20 10:13:38 +01:00
parent b09617a828
commit 43743ed754
8 changed files with 227 additions and 131 deletions

View File

@ -52,9 +52,15 @@ TEST_LIB_NAME=libcygwin0.a
# These objects are included directly into the import library
if TARGET_X86_64
TARGET_FILES= \
x86_64/bcopy.S \
x86_64/memchr.S \
x86_64/memcpy.S \
x86_64/memset.S
x86_64/memmove.S \
x86_64/mempcpy.S \
x86_64/memset.S \
x86_64/wmemcpy.S \
x86_64/wmemmove.S \
x86_64/wmempcpy.S
endif
LIB_FILES= \

View File

@ -0,0 +1,192 @@
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from locore.s.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $")
#endif
/*
* (ov)bcopy (src,dst,cnt)
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*
* Hacked about by dsl@netbsd.org
*/
#ifdef MEMCOPY
#ifdef WIDE
#ifdef POST
ENTRY3(wmempcpy)
#else
ENTRY3(wmemcpy)
#endif
#else
#ifdef POST
ENTRY3(mempcpy)
#else
ENTRY3(memcpy)
#endif
#endif
#define NO_OVERLAP
#else
#ifdef MEMMOVE
#ifdef WIDE
ENTRY3(wmemmove)
#else
ENTRY3(memmove)
#endif
#else
ENTRY3(bcopy)
#endif
#endif
#ifdef WIDE
shlq $1,%rdx /* cnt * sizeof (wchar_t) */
#endif
movq %rdx,%rcx
#if defined(MEMCOPY) || defined(MEMMOVE)
movq %rdi,%rax /* must return destination address */
#ifdef POST
addq %rdx,%rax /* + n */
#endif
mov %rdi,%r11 /* for misaligned check */
#else
mov %rsi,%r11 /* for misaligned check */
xchgq %rdi,%rsi /* bcopy() has arg order reversed */
#endif
#if !defined(NO_OVERLAP)
movq %rdi,%r8
subq %rsi,%r8
#endif
shrq $3,%rcx /* count for copy by words */
jz 8f /* j if less than 8 bytes */
lea -8(%rdi,%rdx),%r9 /* target address of last 8 */
mov -8(%rsi,%rdx),%r10 /* get last word */
#if !defined(NO_OVERLAP)
cmpq %rdx,%r8 /* overlapping? */
jb 10f
#endif
/*
* Non-overlaping, copy forwards.
* Newer Intel cpus (Nehalem) will do 16byte read/write transfers
* if %ecx is more than 76.
* AMD might do something similar some day.
*/
and $7,%r11 /* destination misaligned ? */
jnz 2f
rep
movsq
mov %r10,(%r9) /* write last word */
ret
/*
* Destination misaligned
* AMD say it is better to align the destination (not the source).
* This will also re-align copies if the source and dest are both
* misaligned by the same amount)
* (I think Nehalem will use its accelerated copy if the source
* and destination have the same alignment.)
*/
2:
lea -9(%r11,%rdx),%rcx /* post re-alignment count */
neg %r11 /* now -1 .. -7 */
mov (%rsi),%rdx /* get first word */
mov %rdi,%r8 /* target for first word */
lea 8(%rsi,%r11),%rsi
lea 8(%rdi,%r11),%rdi
shr $3,%rcx
rep
movsq
mov %rdx,(%r8) /* write first word */
mov %r10,(%r9) /* write last word */
ret
#if !defined(NO_OVERLAP)
/* Must copy backwards.
* Reverse copy is probably easy to code faster than 'rep movds'
* since that requires (IIRC) an extra clock every 3 iterations (AMD).
* However I don't suppose anything cares that much!
* The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
* The copy is aligned with the buffer start (more likely to
* be a multiple of 8 than the end).
*/
10:
lea -8(%rsi,%rcx,8),%rsi
lea -8(%rdi,%rcx,8),%rdi
std
rep
movsq
cld
mov %r10,(%r9) /* write last bytes */
ret
#endif
/* Less than 8 bytes to copy, copy by bytes */
/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
* For longer transfers it is 50+ !
*/
8: mov %rdx,%rcx
#if !defined(NO_OVERLAP)
cmpq %rdx,%r8 /* overlapping? */
jb 81f
#endif
/* nope, copy forwards. */
rep
movsb
ret
#if !defined(NO_OVERLAP)
/* Must copy backwards */
81:
lea -1(%rsi,%rcx),%rsi
lea -1(%rdi,%rcx),%rdi
std
rep
movsb
cld
ret
#endif
#ifdef MEMCOPY
END(memcpy)
#else
#ifdef MEMMOVE
END(memmove)
#else
END(bcopy)
#endif
#endif

View File

@ -1,131 +1,4 @@
/* These functions are almost verbatim FreeBSD code (even if the header of
one file mentiones NetBSD), just wrapped in the minimum required code to
make them work under the MS AMD64 ABI.
See FreeBSD src/lib/libc/amd64/string/bcopy.S */
/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from locore.s.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
.seh_proc _memcpy
_memcpy:
movq %rsi,8(%rsp)
movq %rdi,16(%rsp)
.seh_endprologue
movq %rcx,%rdi
movq %rdx,%rsi
movq %r8,%rdx
movq %rdx,%rcx
movq %rdi,%r8
subq %rsi,%r8
cmpq %rcx,%r8 /* overlapping? */
jb 1f
cld /* nope, copy forwards. */
shrq $3,%rcx /* copy by words */
rep movsq
movq %rdx,%rcx
andq $7,%rcx /* any bytes left? */
rep movsb
jmp 2f
1:
addq %rcx,%rdi /* copy backwards. */
addq %rcx,%rsi
std
andq $7,%rcx /* any fractional bytes? */
decq %rdi
decq %rsi
rep movsb
movq %rdx,%rcx /* copy remainder by words */
shrq $3,%rcx
subq $7,%rsi
subq $7,%rdi
rep movsq
cld
2:
movq 8(%rsp),%rsi
movq 16(%rsp),%rdi
ret
.seh_endproc
.globl memmove
.seh_proc memmove
memmove:
.seh_endprologue
movq %rcx,%rax /* return dst */
jmp _memcpy
.seh_endproc
.globl memcpy
.seh_proc memcpy
memcpy:
.seh_endprologue
movq %rcx,%rax /* return dst */
jmp _memcpy
.seh_endproc
.globl mempcpy
.seh_proc mempcpy
mempcpy:
.seh_endprologue
movq %rcx,%rax /* return dst */
addq %r8,%rax /* + n */
jmp _memcpy
.seh_endproc
.globl wmemmove
.seh_proc wmemmove
wmemmove:
.seh_endprologue
shlq $1,%r8 /* cnt * sizeof (wchar_t) */
movq %rcx,%rax /* return dst */
jmp _memcpy
.seh_endproc
.globl wmemcpy
.seh_proc wmemcpy
wmemcpy:
.seh_endprologue
shlq $1,%r8 /* cnt * sizeof (wchar_t) */
movq %rcx,%rax /* return dst */
jmp _memcpy
.seh_endproc
.globl wmempcpy
.seh_proc wmempcpy
wmempcpy:
.seh_endprologue
shlq $1,%r8 /* cnt * sizeof (wchar_t) */
movq %rcx,%rax /* return dst */
addq %r8,%rax /* + n */
jmp _memcpy
.seh_endproc
#define MEMCOPY
#include "bcopy.S"

View File

@ -0,0 +1,4 @@
/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
#define MEMMOVE
#include "bcopy.S"

View File

@ -0,0 +1,5 @@
/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
#define MEMCOPY
#define POST
#include "bcopy.S"

View File

@ -0,0 +1,5 @@
/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
#define MEMCOPY
#define WIDE
#include "bcopy.S"

View File

@ -0,0 +1,5 @@
/* $NetBSD: memmove.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
#define MEMMOVE
#define WIDE
#include "bcopy.S"

View File

@ -0,0 +1,6 @@
/* $NetBSD: memcpy.S,v 1.1 2005/12/20 19:28:51 christos Exp $ */
#define MEMCOPY
#define WIDE
#define POST
#include "bcopy.S"