newlib-cygwin/newlib/libc/machine/aarch64/strchrnul.S

/*
 * strchrnul - find a character or nul in a string
 *
 * Copyright (c) 2014-2022, Arm Limited.
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 */
#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED))
/* See strchrnul-stub.c  */
#else

/* Assumptions:
 *
 * ARMv8-a, AArch64
 * Neon Available.
 */

#include "asmdefs.h"

/* Arguments and results.  */
#define srcin		x0
#define chrin		w1

#define result		x0

#define src		x2
#define	tmp1		x3
#define wtmp2		w4
#define tmp3		x5

#define vrepchr		v0
#define vdata1		v1
#define vdata2		v2
#define vhas_nul1	v3
#define vhas_nul2	v4
#define vhas_chr1	v5
#define vhas_chr2	v6
#define vrepmask	v7
#define vend1		v16

/* Core algorithm.

   For each 32-byte hunk we calculate a 64-bit syndrome value, with
   two bits per byte (LSB is always in bits 0 and 1, for both big
   and little-endian systems).  For each tuple, bit 0 is set iff
   the relevant byte matched the requested character or nul.  Since the
   bits in the syndrome reflect exactly the order in which things occur
   in the original string a count_trailing_zeros() operation will
   identify exactly which byte is causing the termination.  */

/* Locals and temporaries.  */

ENTRY (strchrnul)
	PTR_ARG (0)
	/* Magic constant 0x40100401 to allow us to identify which lane
	   matches the termination condition.  */
	mov	wtmp2, #0x0401
	movk	wtmp2, #0x4010, lsl #16
	dup	vrepchr.16b, chrin
	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
	dup	vrepmask.4s, wtmp2
	ands	tmp1, srcin, #31
	b.eq	L(loop)

	/* Input string is not 32-byte aligned.  Rather than forcing
	   the padding bytes to a safe value, we calculate the syndrome
	   for all the bytes, but then mask off those bits of the
	   syndrome that are related to the padding.  */
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
	neg	tmp1, tmp1
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
	and	vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
	and	vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
	lsl	tmp1, tmp1, #1
	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
	mov	tmp3, #~0
	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
	lsr	tmp1, tmp3, tmp1

	mov	tmp3, vend1.d[0]
	bic	tmp1, tmp3, tmp1	// Mask padding bits.
	cbnz	tmp1, L(tail)

	.p2align 4
L(loop):
	ld1	{vdata1.16b, vdata2.16b}, [src], #32
	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
	cmhs	vhas_nul1.16b, vhas_chr1.16b, vdata1.16b
	cmhs	vhas_nul2.16b, vhas_chr2.16b, vdata2.16b
	orr	vend1.16b, vhas_nul1.16b, vhas_nul2.16b
	umaxp	vend1.16b, vend1.16b, vend1.16b
	mov	tmp1, vend1.d[0]
	cbz	tmp1, L(loop)

	/* Termination condition found.  Now need to establish exactly why
	   we terminated.  */
	and	vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b
	and	vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b
	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64

	mov	tmp1, vend1.d[0]
L(tail):
	/* Count the trailing zeros, by bit reversing...  */
	rbit	tmp1, tmp1
	/* Re-bias source.  */
	sub	src, src, #32
	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
	/* tmp1 is twice the offset into the fragment.  */
	add	result, src, tmp1, lsr #1
	ret

END (strchrnul)
#endif
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`/*`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`* strchrnul - find a character or nul in a string`
			`*`
			`* Copyright (c) 2014-2022, Arm Limited.`
			`* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception`
			`*/`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`#if (defined (__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED))`
			`/* See strchrnul-stub.c */`
			`#else`

			`/* Assumptions:`
			`*`
			`* ARMv8-a, AArch64`
			`* Neon Available.`
			`*/`

aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`#include "asmdefs.h"`

* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`/* Arguments and results. */`
			`#define srcin x0`
			`#define chrin w1`

			`#define result x0`

			`#define src x2`
			`#define tmp1 x3`
			`#define wtmp2 w4`
			`#define tmp3 x5`

			`#define vrepchr v0`
			`#define vdata1 v1`
			`#define vdata2 v2`
			`#define vhas_nul1 v3`
			`#define vhas_nul2 v4`
			`#define vhas_chr1 v5`
			`#define vhas_chr2 v6`
* libc/machine/aarch64/strchrnul.S (vrepmask): Use a call-clobbered register. 2014-12-10 17:35:10 +08:00			`#define vrepmask v7`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`#define vend1 v16`

			`/* Core algorithm.`

			`For each 32-byte hunk we calculate a 64-bit syndrome value, with`
			`two bits per byte (LSB is always in bits 0 and 1, for both big`
			`and little-endian systems). For each tuple, bit 0 is set iff`
			`the relevant byte matched the requested character or nul. Since the`
			`bits in the syndrome reflect exactly the order in which things occur`
			`in the original string a count_trailing_zeros() operation will`
			`identify exactly which byte is causing the termination. */`

			`/* Locals and temporaries. */`

aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`ENTRY (strchrnul)`
			`PTR_ARG (0)`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`/* Magic constant 0x40100401 to allow us to identify which lane`
			`matches the termination condition. */`
			`mov wtmp2, #0x0401`
			`movk wtmp2, #0x4010, lsl #16`
			`dup vrepchr.16b, chrin`
			`bic src, srcin, #31 /* Work with aligned 32-byte hunks. */`
			`dup vrepmask.4s, wtmp2`
			`ands tmp1, srcin, #31`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`b.eq L(loop)`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00
			`/* Input string is not 32-byte aligned. Rather than forcing`
			`the padding bytes to a safe value, we calculate the syndrome`
			`for all the bytes, but then mask off those bits of the`
			`syndrome that are related to the padding. */`
			`ld1 {vdata1.16b, vdata2.16b}, [src], #32`
			`neg tmp1, tmp1`
			`cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b`
			`cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b`
			`cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b`
			`and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b`
			`and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`lsl tmp1, tmp1, #1`
			`addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128`
			`mov tmp3, #~0`
			`addp vend1.16b, vend1.16b, vend1.16b // 128->64`
			`lsr tmp1, tmp3, tmp1`

aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`mov tmp3, vend1.d[0]`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`bic tmp1, tmp3, tmp1 // Mask padding bits.`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`cbnz tmp1, L(tail)`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`.p2align 4`
			`L(loop):`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`ld1 {vdata1.16b, vdata2.16b}, [src], #32`
			`cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b`
			`cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b`
			`cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b`
			`orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b`
			`umaxp vend1.16b, vend1.16b, vend1.16b`
			`mov tmp1, vend1.d[0]`
			`cbz tmp1, L(loop)`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00
			`/* Termination condition found. Now need to establish exactly why`
			`we terminated. */`
aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`and vhas_chr1.16b, vhas_nul1.16b, vrepmask.16b`
			`and vhas_chr2.16b, vhas_nul2.16b, vrepmask.16b`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128`
			`addp vend1.16b, vend1.16b, vend1.16b // 128->64`

aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`mov tmp1, vend1.d[0]`
			`L(tail):`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`/* Count the trailing zeros, by bit reversing... */`
			`rbit tmp1, tmp1`
			`/* Re-bias source. */`
			`sub src, src, #32`
			`clz tmp1, tmp1 /* ... and counting the leading zeros. */`
			`/* tmp1 is twice the offset into the fragment. */`
			`add result, src, tmp1, lsr #1`
			`ret`

aarch64: Sync with ARM-software/optimized-routines Update AArch64 assembly string routines from: https://github.com/ARM-software/optimized-routines commit 0cf84f26b6b8dcad8287fe30a4dcc1fdabd06560 Author: Sebastian Huber <sebastian.huber@embedded-brains.de> Date: Thu Jul 27 17:14:57 2023 +0200 string: Fix corrupt GNU_PROPERTY_TYPE (5) size For ELF32 the notes alignment is 4 and not 8. Add license and copyright information to COPYING.NEWLIB as entry (56). 2023-09-12 16:33:09 +08:00			`END (strchrnul)`
* libc/machine/aarch64/strchrnul.S: New file. * libc/machine/aarch64/strchrnul-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add them to build list. * libc/machine/aarch64/Makefile.in: Regenerated. 2014-06-11 18:42:54 +08:00			`#endif`