libm/machine/arm: Add optimized fmaf and fma when available
When HAVE_FAST_FMAF is set, use the vfma.f32 instruction, when HAVE_FAST_FMA is set, use the vfma.f64 instruction. Usually the compiler built-ins will already have inlined these instructions, but provide these symbols for cases where that doesn't work instead of falling back to the (inaccurate) common code versions. Signed-off-by: Keith Packard <keithp@keithp.com>
This commit is contained in:
parent
0c1989070e
commit
a44bc679a4
|
@ -38,6 +38,8 @@ ANSI C, POSIX.
|
|||
|
||||
#include "fdlibm.h"
|
||||
|
||||
#if !HAVE_FAST_FMA
|
||||
|
||||
#ifndef _DOUBLE_IS_32BITS
|
||||
|
||||
#ifdef __STDC__
|
||||
|
@ -54,3 +56,5 @@ ANSI C, POSIX.
|
|||
}
|
||||
|
||||
#endif /* _DOUBLE_IS_32BITS */
|
||||
|
||||
#endif /* !HAVE_FAST_FMA */
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include "fdlibm.h"
|
||||
|
||||
#if !HAVE_FAST_FMAF
|
||||
|
||||
#ifdef __STDC__
|
||||
float fmaf(float x, float y, float z)
|
||||
#else
|
||||
|
@ -25,6 +27,8 @@
|
|||
return (float) (((double) x * (double) y) + (double) z);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_IS_32BITS
|
||||
|
||||
#ifdef __STDC__
|
||||
|
|
|
@ -10,12 +10,14 @@ LIB_SOURCES = \
|
|||
ef_sqrt.c \
|
||||
s_ceil.c \
|
||||
s_floor.c \
|
||||
s_fma.c \
|
||||
s_nearbyint.c \
|
||||
s_rint.c \
|
||||
s_round.c \
|
||||
s_trunc.c \
|
||||
sf_ceil.c \
|
||||
sf_floor.c \
|
||||
sf_fma.c \
|
||||
sf_nearbyint.c \
|
||||
sf_rint.c \
|
||||
sf_round.c \
|
||||
|
|
|
@ -54,8 +54,7 @@ build_triplet = @build@
|
|||
host_triplet = @host@
|
||||
DIST_COMMON = $(srcdir)/../../../Makefile.shared $(srcdir)/Makefile.in \
|
||||
$(srcdir)/Makefile.am $(top_srcdir)/configure \
|
||||
$(am__configure_deps) $(srcdir)/../../../../mkinstalldirs \
|
||||
$(srcdir)/../../../../mkinstalldirs
|
||||
$(am__configure_deps) $(srcdir)/../../../../mkinstalldirs
|
||||
subdir = .
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \
|
||||
|
@ -73,9 +72,10 @@ lib_a_AR = $(AR) $(ARFLAGS)
|
|||
lib_a_LIBADD =
|
||||
am__objects_1 = lib_a-e_sqrt.$(OBJEXT) lib_a-ef_sqrt.$(OBJEXT) \
|
||||
lib_a-s_ceil.$(OBJEXT) lib_a-s_floor.$(OBJEXT) \
|
||||
lib_a-s_nearbyint.$(OBJEXT) lib_a-s_rint.$(OBJEXT) \
|
||||
lib_a-s_round.$(OBJEXT) lib_a-s_trunc.$(OBJEXT) \
|
||||
lib_a-sf_ceil.$(OBJEXT) lib_a-sf_floor.$(OBJEXT) \
|
||||
lib_a-s_fma.$(OBJEXT) lib_a-s_nearbyint.$(OBJEXT) \
|
||||
lib_a-s_rint.$(OBJEXT) lib_a-s_round.$(OBJEXT) \
|
||||
lib_a-s_trunc.$(OBJEXT) lib_a-sf_ceil.$(OBJEXT) \
|
||||
lib_a-sf_floor.$(OBJEXT) lib_a-sf_fma.$(OBJEXT) \
|
||||
lib_a-sf_nearbyint.$(OBJEXT) lib_a-sf_rint.$(OBJEXT) \
|
||||
lib_a-sf_round.$(OBJEXT) lib_a-sf_trunc.$(OBJEXT) \
|
||||
lib_a-feclearexcept.$(OBJEXT) lib_a-fe_dfl_env.$(OBJEXT) \
|
||||
|
@ -216,12 +216,14 @@ LIB_SOURCES = \
|
|||
ef_sqrt.c \
|
||||
s_ceil.c \
|
||||
s_floor.c \
|
||||
s_fma.c \
|
||||
s_nearbyint.c \
|
||||
s_rint.c \
|
||||
s_round.c \
|
||||
s_trunc.c \
|
||||
sf_ceil.c \
|
||||
sf_floor.c \
|
||||
sf_fma.c \
|
||||
sf_nearbyint.c \
|
||||
sf_rint.c \
|
||||
sf_round.c \
|
||||
|
@ -342,6 +344,12 @@ lib_a-s_floor.o: s_floor.c
|
|||
lib_a-s_floor.obj: s_floor.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_floor.obj `if test -f 's_floor.c'; then $(CYGPATH_W) 's_floor.c'; else $(CYGPATH_W) '$(srcdir)/s_floor.c'; fi`
|
||||
|
||||
lib_a-s_fma.o: s_fma.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.o `test -f 's_fma.c' || echo '$(srcdir)/'`s_fma.c
|
||||
|
||||
lib_a-s_fma.obj: s_fma.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fma.obj `if test -f 's_fma.c'; then $(CYGPATH_W) 's_fma.c'; else $(CYGPATH_W) '$(srcdir)/s_fma.c'; fi`
|
||||
|
||||
lib_a-s_nearbyint.o: s_nearbyint.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nearbyint.o `test -f 's_nearbyint.c' || echo '$(srcdir)/'`s_nearbyint.c
|
||||
|
||||
|
@ -378,6 +386,12 @@ lib_a-sf_floor.o: sf_floor.c
|
|||
lib_a-sf_floor.obj: sf_floor.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_floor.obj `if test -f 'sf_floor.c'; then $(CYGPATH_W) 'sf_floor.c'; else $(CYGPATH_W) '$(srcdir)/sf_floor.c'; fi`
|
||||
|
||||
lib_a-sf_fma.o: sf_fma.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.o `test -f 'sf_fma.c' || echo '$(srcdir)/'`sf_fma.c
|
||||
|
||||
lib_a-sf_fma.obj: sf_fma.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fma.obj `if test -f 'sf_fma.c'; then $(CYGPATH_W) 'sf_fma.c'; else $(CYGPATH_W) '$(srcdir)/sf_fma.c'; fi`
|
||||
|
||||
lib_a-sf_nearbyint.o: sf_nearbyint.c
|
||||
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nearbyint.o `test -f 'sf_nearbyint.c' || echo '$(srcdir)/'`sf_nearbyint.c
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright © 2020 Keith Packard
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#if HAVE_FAST_FMA
|
||||
|
||||
double
|
||||
fma (double x, double y, double z)
|
||||
{
|
||||
asm ("vfma.f64 %P0, %P1, %P2" : "=w" (z) : "w" (x), "w" (y));
|
||||
return z;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*
|
||||
* Copyright © 2020 Keith Packard
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "math_config.h"
|
||||
|
||||
#if HAVE_FAST_FMAF
|
||||
|
||||
float
|
||||
fmaf (float x, float y, float z)
|
||||
{
|
||||
asm ("vfma.f32 %0, %1, %2" : "=t" (z) : "t" (x), "t" (y));
|
||||
return z;
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue