Revert "amdgcn: Add vectorized math routines"
This reverts commit 125e39bfea
.
This commit is contained in:
parent
abf672604b
commit
c04c01524d
1865
newlib/Makefile.in
1865
newlib/Makefile.in
File diff suppressed because one or more lines are too long
|
@ -623,8 +623,6 @@ HAVE_LIBM_MACHINE_I386_FALSE
|
||||||
HAVE_LIBM_MACHINE_I386_TRUE
|
HAVE_LIBM_MACHINE_I386_TRUE
|
||||||
HAVE_LIBM_MACHINE_ARM_FALSE
|
HAVE_LIBM_MACHINE_ARM_FALSE
|
||||||
HAVE_LIBM_MACHINE_ARM_TRUE
|
HAVE_LIBM_MACHINE_ARM_TRUE
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_FALSE
|
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_TRUE
|
|
||||||
HAVE_LIBM_MACHINE_AARCH64_FALSE
|
HAVE_LIBM_MACHINE_AARCH64_FALSE
|
||||||
HAVE_LIBM_MACHINE_AARCH64_TRUE
|
HAVE_LIBM_MACHINE_AARCH64_TRUE
|
||||||
HAS_NDS32_FPU_DP_FALSE
|
HAS_NDS32_FPU_DP_FALSE
|
||||||
|
@ -6218,14 +6216,6 @@ else
|
||||||
HAVE_LIBM_MACHINE_AARCH64_FALSE=
|
HAVE_LIBM_MACHINE_AARCH64_FALSE=
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "${libm_machine_dir}" = "amdgcn"; then
|
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_TRUE=
|
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_FALSE='#'
|
|
||||||
else
|
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_TRUE='#'
|
|
||||||
HAVE_LIBM_MACHINE_AMDGCN_FALSE=
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test "${libm_machine_dir}" = "arm"; then
|
if test "${libm_machine_dir}" = "arm"; then
|
||||||
HAVE_LIBM_MACHINE_ARM_TRUE=
|
HAVE_LIBM_MACHINE_ARM_TRUE=
|
||||||
HAVE_LIBM_MACHINE_ARM_FALSE='#'
|
HAVE_LIBM_MACHINE_ARM_FALSE='#'
|
||||||
|
@ -7885,10 +7875,6 @@ if test -z "${HAVE_LIBM_MACHINE_AARCH64_TRUE}" && test -z "${HAVE_LIBM_MACHINE_A
|
||||||
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_AARCH64\" was never defined.
|
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_AARCH64\" was never defined.
|
||||||
Usually this means the macro was only invoked conditionally." "$LINENO" 5
|
Usually this means the macro was only invoked conditionally." "$LINENO" 5
|
||||||
fi
|
fi
|
||||||
if test -z "${HAVE_LIBM_MACHINE_AMDGCN_TRUE}" && test -z "${HAVE_LIBM_MACHINE_AMDGCN_FALSE}"; then
|
|
||||||
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_AMDGCN\" was never defined.
|
|
||||||
Usually this means the macro was only invoked conditionally." "$LINENO" 5
|
|
||||||
fi
|
|
||||||
if test -z "${HAVE_LIBM_MACHINE_ARM_TRUE}" && test -z "${HAVE_LIBM_MACHINE_ARM_FALSE}"; then
|
if test -z "${HAVE_LIBM_MACHINE_ARM_TRUE}" && test -z "${HAVE_LIBM_MACHINE_ARM_FALSE}"; then
|
||||||
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_ARM\" was never defined.
|
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_ARM\" was never defined.
|
||||||
Usually this means the macro was only invoked conditionally." "$LINENO" 5
|
Usually this means the macro was only invoked conditionally." "$LINENO" 5
|
||||||
|
|
|
@ -119,7 +119,6 @@ case "${host_cpu}" in
|
||||||
amdgcn*)
|
amdgcn*)
|
||||||
newlib_cflags="${newlib_cflags} -D__DYNAMIC_REENT__"
|
newlib_cflags="${newlib_cflags} -D__DYNAMIC_REENT__"
|
||||||
machine_dir=amdgcn
|
machine_dir=amdgcn
|
||||||
libm_machine_dir=amdgcn
|
|
||||||
newlib_cv_initfinit_array=yes
|
newlib_cv_initfinit_array=yes
|
||||||
;;
|
;;
|
||||||
arc*)
|
arc*)
|
||||||
|
|
|
@ -51,9 +51,6 @@ include %D%/test/Makefile.inc
|
||||||
if HAVE_LIBM_MACHINE_AARCH64
|
if HAVE_LIBM_MACHINE_AARCH64
|
||||||
include %D%/machine/aarch64/Makefile.inc
|
include %D%/machine/aarch64/Makefile.inc
|
||||||
endif
|
endif
|
||||||
if HAVE_LIBM_MACHINE_AMDGCN
|
|
||||||
include %D%/machine/amdgcn/Makefile.inc
|
|
||||||
endif
|
|
||||||
if HAVE_LIBM_MACHINE_ARM
|
if HAVE_LIBM_MACHINE_ARM
|
||||||
include %D%/machine/arm/Makefile.inc
|
include %D%/machine/arm/Makefile.inc
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -4,7 +4,7 @@ m4_include([libm/machine/nds32/acinclude.m4])
|
||||||
|
|
||||||
dnl Define HAVE_LIBM_MACHINE_<machine> automake conditionals.
|
dnl Define HAVE_LIBM_MACHINE_<machine> automake conditionals.
|
||||||
m4_foreach_w([MACHINE], [
|
m4_foreach_w([MACHINE], [
|
||||||
aarch64 amdgcn arm i386 mips nds32 powerpc pru sparc spu riscv x86_64
|
aarch64 arm i386 mips nds32 powerpc pru sparc spu riscv x86_64
|
||||||
], [dnl
|
], [dnl
|
||||||
AM_CONDITIONAL([HAVE_LIBM_MACHINE_]m4_toupper(MACHINE), test "${libm_machine_dir}" = "MACHINE")
|
AM_CONDITIONAL([HAVE_LIBM_MACHINE_]m4_toupper(MACHINE), test "${libm_machine_dir}" = "MACHINE")
|
||||||
])
|
])
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
%C%_src = \
|
|
||||||
%D%/v64_mathcnst.c \
|
|
||||||
%D%/v64_reent.c \
|
|
||||||
%D%/v64df_acos.c \
|
|
||||||
%D%/v64df_acosh.c \
|
|
||||||
%D%/v64df_asin.c \
|
|
||||||
%D%/v64df_asine.c \
|
|
||||||
%D%/v64df_asinh.c \
|
|
||||||
%D%/v64df_atan.c \
|
|
||||||
%D%/v64df_atan2.c \
|
|
||||||
%D%/v64df_atangent.c \
|
|
||||||
%D%/v64df_atanh.c \
|
|
||||||
%D%/v64df_copysign.c \
|
|
||||||
%D%/v64df_cos.c \
|
|
||||||
%D%/v64df_cosh.c \
|
|
||||||
%D%/v64df_erf.c \
|
|
||||||
%D%/v64df_exp.c \
|
|
||||||
%D%/v64df_exp2.c \
|
|
||||||
%D%/v64df_finite.c \
|
|
||||||
%D%/v64df_fmod.c \
|
|
||||||
%D%/v64df_gamma.c \
|
|
||||||
%D%/v64df_hypot.c \
|
|
||||||
%D%/v64df_ilogb.c \
|
|
||||||
%D%/v64df_isnan.c \
|
|
||||||
%D%/v64df_ispos.c \
|
|
||||||
%D%/v64df_lgamma.c \
|
|
||||||
%D%/v64df_lgamma_r.c \
|
|
||||||
%D%/v64df_log.c \
|
|
||||||
%D%/v64df_log10.c \
|
|
||||||
%D%/v64df_log2.c \
|
|
||||||
%D%/v64df_modf.c \
|
|
||||||
%D%/v64df_numtest.c \
|
|
||||||
%D%/v64df_pow.c \
|
|
||||||
%D%/v64df_remainder.c \
|
|
||||||
%D%/v64df_rint.c \
|
|
||||||
%D%/v64df_scalb.c \
|
|
||||||
%D%/v64df_scalbn.c \
|
|
||||||
%D%/v64df_signif.c \
|
|
||||||
%D%/v64df_sin.c \
|
|
||||||
%D%/v64df_sine.c \
|
|
||||||
%D%/v64df_sineh.c \
|
|
||||||
%D%/v64df_sinh.c \
|
|
||||||
%D%/v64df_sqrt.c \
|
|
||||||
%D%/v64df_tan.c \
|
|
||||||
%D%/v64df_tanh.c \
|
|
||||||
%D%/v64df_tgamma.c \
|
|
||||||
%D%/v64sf_acos.c \
|
|
||||||
%D%/v64sf_acosh.c \
|
|
||||||
%D%/v64sf_asin.c \
|
|
||||||
%D%/v64sf_asine.c \
|
|
||||||
%D%/v64sf_asinh.c \
|
|
||||||
%D%/v64sf_atan.c \
|
|
||||||
%D%/v64sf_atan2.c \
|
|
||||||
%D%/v64sf_atangent.c \
|
|
||||||
%D%/v64sf_atanh.c \
|
|
||||||
%D%/v64sf_copysign.c \
|
|
||||||
%D%/v64sf_cos.c \
|
|
||||||
%D%/v64sf_cosh.c \
|
|
||||||
%D%/v64sf_erf.c \
|
|
||||||
%D%/v64sf_exp.c \
|
|
||||||
%D%/v64sf_exp2.c \
|
|
||||||
%D%/v64sf_finite.c \
|
|
||||||
%D%/v64sf_fmod.c \
|
|
||||||
%D%/v64sf_gamma.c \
|
|
||||||
%D%/v64sf_hypot.c \
|
|
||||||
%D%/v64sf_ilogb.c \
|
|
||||||
%D%/v64sf_isnan.c \
|
|
||||||
%D%/v64sf_ispos.c \
|
|
||||||
%D%/v64sf_lgamma.c \
|
|
||||||
%D%/v64sf_lgamma_r.c \
|
|
||||||
%D%/v64sf_log.c \
|
|
||||||
%D%/v64sf_log10.c \
|
|
||||||
%D%/v64sf_log2.c \
|
|
||||||
%D%/v64sf_modf.c \
|
|
||||||
%D%/v64sf_numtest.c \
|
|
||||||
%D%/v64sf_pow.c \
|
|
||||||
%D%/v64sf_remainder.c \
|
|
||||||
%D%/v64sf_rint.c \
|
|
||||||
%D%/v64sf_scalb.c \
|
|
||||||
%D%/v64sf_scalbn.c \
|
|
||||||
%D%/v64sf_signif.c \
|
|
||||||
%D%/v64sf_sin.c \
|
|
||||||
%D%/v64sf_sine.c \
|
|
||||||
%D%/v64sf_sineh.c \
|
|
||||||
%D%/v64sf_sinh.c \
|
|
||||||
%D%/v64sf_sqrt.c \
|
|
||||||
%D%/v64sf_tan.c \
|
|
||||||
%D%/v64sf_tanh.c \
|
|
||||||
%D%/v64sf_tgamma.c
|
|
||||||
|
|
||||||
libm_a_SOURCES += $(%C%_src)
|
|
|
@ -1,484 +0,0 @@
|
||||||
/* Optimization at -O2 and above currently result in ICEs when converting
|
|
||||||
between vector types. */
|
|
||||||
#pragma GCC optimize ("O1")
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <machine/ieeefp.h>
|
|
||||||
|
|
||||||
typedef float v2sf __attribute__ ((vector_size (8)));
|
|
||||||
typedef float v4sf __attribute__ ((vector_size (16)));
|
|
||||||
typedef float v8sf __attribute__ ((vector_size (32)));
|
|
||||||
typedef float v16sf __attribute__ ((vector_size (64)));
|
|
||||||
typedef float v32sf __attribute__ ((vector_size (128)));
|
|
||||||
typedef float v64sf __attribute__ ((vector_size (256)));
|
|
||||||
|
|
||||||
typedef double v2df __attribute__ ((vector_size (16)));
|
|
||||||
typedef double v4df __attribute__ ((vector_size (32)));
|
|
||||||
typedef double v8df __attribute__ ((vector_size (64)));
|
|
||||||
typedef double v16df __attribute__ ((vector_size (128)));
|
|
||||||
typedef double v32df __attribute__ ((vector_size (256)));
|
|
||||||
typedef double v64df __attribute__ ((vector_size (512)));
|
|
||||||
|
|
||||||
typedef int v2si __attribute__ ((vector_size (8)));
|
|
||||||
typedef int v4si __attribute__ ((vector_size (16)));
|
|
||||||
typedef int v8si __attribute__ ((vector_size (32)));
|
|
||||||
typedef int v16si __attribute__ ((vector_size (64)));
|
|
||||||
typedef int v32si __attribute__ ((vector_size (128)));
|
|
||||||
typedef int v64si __attribute__ ((vector_size (256)));
|
|
||||||
|
|
||||||
typedef unsigned int v64usi __attribute__ ((vector_size (256)));
|
|
||||||
|
|
||||||
typedef long v2di __attribute__ ((vector_size (16)));
|
|
||||||
typedef long v4di __attribute__ ((vector_size (32)));
|
|
||||||
typedef long v8di __attribute__ ((vector_size (64)));
|
|
||||||
typedef long v16di __attribute__ ((vector_size (128)));
|
|
||||||
typedef long v32di __attribute__ ((vector_size (256)));
|
|
||||||
typedef long v64di __attribute__ ((vector_size (512)));
|
|
||||||
|
|
||||||
typedef union {
|
|
||||||
v2sf t_v2sf;
|
|
||||||
v4sf t_v4sf;
|
|
||||||
v8sf t_v8sf;
|
|
||||||
v16sf t_v16sf;
|
|
||||||
v32sf t_v32sf;
|
|
||||||
v64sf t_v64sf;
|
|
||||||
|
|
||||||
v2df t_v2df;
|
|
||||||
v4df t_v4df;
|
|
||||||
v8df t_v8df;
|
|
||||||
v16df t_v16df;
|
|
||||||
v32df t_v32df;
|
|
||||||
v64df t_v64df;
|
|
||||||
|
|
||||||
v2si t_v2si;
|
|
||||||
v4si t_v4si;
|
|
||||||
v8si t_v8si;
|
|
||||||
v16si t_v16si;
|
|
||||||
v32si t_v32si;
|
|
||||||
v64si t_v64si;
|
|
||||||
|
|
||||||
v64usi t_v64usi;
|
|
||||||
|
|
||||||
v2di t_v2di;
|
|
||||||
v4di t_v4di;
|
|
||||||
v8di t_v8di;
|
|
||||||
v16di t_v16di;
|
|
||||||
v32di t_v32di;
|
|
||||||
v64di t_v64di;
|
|
||||||
} vector_union;
|
|
||||||
|
|
||||||
/* Cast between vectors with a different number of elements. */
|
|
||||||
|
|
||||||
#define RESIZE_VECTOR(to_t, from) \
|
|
||||||
({ \
|
|
||||||
__auto_type __from = (from); \
|
|
||||||
*((to_t *) &__from); \
|
|
||||||
})
|
|
||||||
|
|
||||||
/* Bit-wise cast vector FROM to type TO_T. */
|
|
||||||
|
|
||||||
#define CAST_VECTOR(to_t, from) \
|
|
||||||
({ \
|
|
||||||
_Static_assert (sizeof (to_t) == sizeof (from)); \
|
|
||||||
union { \
|
|
||||||
typeof (from) __from; \
|
|
||||||
to_t __to; \
|
|
||||||
} __tmp; \
|
|
||||||
__tmp.__from = (from); \
|
|
||||||
__tmp.__to; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define EXTRACT_WORDS(hi, lo, x) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64df = (x); \
|
|
||||||
hi = __builtin_convertvector (__tmp.t_v64di >> 32, typeof (hi)); \
|
|
||||||
lo = __builtin_convertvector (__tmp.t_v64di & 0xffffffff, typeof (lo)); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define INSERT_WORDS(x, hi, lo, cond) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64di = __builtin_convertvector (hi, v64di) << 32 | \
|
|
||||||
__builtin_convertvector (lo, v64di) & 0xffffffff; \
|
|
||||||
VECTOR_COND_MOVE (x, __tmp.t_v64df, cond); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define GET_HIGH_WORD(x, y, cond) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64df = (y); \
|
|
||||||
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di >> 32, v64si), \
|
|
||||||
(cond)); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define GET_LOW_WORD(x, y, cond) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64df = (y); \
|
|
||||||
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di & 0xffffffff, \
|
|
||||||
v64si), (cond)); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define SET_HIGH_WORD(x, y, cond) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64df = x; \
|
|
||||||
__tmp.t_v64di &= 0xffffffff; \
|
|
||||||
__tmp.t_v64di |= __builtin_convertvector (y, v64di) << 32; \
|
|
||||||
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define SET_LOW_WORD(x, y, cond) \
|
|
||||||
do { \
|
|
||||||
vector_union __tmp; \
|
|
||||||
__tmp.t_v64df = x; \
|
|
||||||
__tmp.t_v64di &= 0xffffffff00000000ULL; \
|
|
||||||
__tmp.t_v64di |= __builtin_convertvector (y, v64di); \
|
|
||||||
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define GET_FLOAT_WORD(x, y, cond) \
|
|
||||||
VECTOR_COND_MOVE(x, CAST_VECTOR(v64si, (y)), (cond))
|
|
||||||
|
|
||||||
#define SET_FLOAT_WORD(x, y, cond) \
|
|
||||||
VECTOR_COND_MOVE(x, CAST_VECTOR(v64sf, (y)), (cond))
|
|
||||||
|
|
||||||
#define NO_COND __mask
|
|
||||||
|
|
||||||
/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */
|
|
||||||
#define VECTOR_MERGE(vec1, vec2, cond) \
|
|
||||||
({ \
|
|
||||||
_Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \
|
|
||||||
union { \
|
|
||||||
typeof (vec1) val; \
|
|
||||||
v64si t_v64si; \
|
|
||||||
v64di t_v64di; \
|
|
||||||
} __vec1, __vec2, __res; \
|
|
||||||
__vec1.val = (vec1); \
|
|
||||||
__vec2.val = (vec2); \
|
|
||||||
__builtin_choose_expr ( \
|
|
||||||
sizeof (vec1) == sizeof (v64si), \
|
|
||||||
({ \
|
|
||||||
v64si __bitmask = __builtin_convertvector ((cond), v64si); \
|
|
||||||
__res.t_v64si = (__vec1.t_v64si & __bitmask) \
|
|
||||||
| (__vec2.t_v64si & ~__bitmask); \
|
|
||||||
}), \
|
|
||||||
({ \
|
|
||||||
v64di __bitmask = __builtin_convertvector ((cond), v64di); \
|
|
||||||
__res.t_v64di = (__vec1.t_v64di & __bitmask) \
|
|
||||||
| (__vec2.t_v64di & ~__bitmask); \
|
|
||||||
})); \
|
|
||||||
__res.val; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define VECTOR_RETURN(retval, cond) \
|
|
||||||
do { \
|
|
||||||
_Static_assert (__builtin_types_compatible_p (typeof (retval), typeof (__ret))); \
|
|
||||||
__auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
|
|
||||||
__auto_type __retval = (retval); \
|
|
||||||
VECTOR_COND_MOVE (__ret, __retval, __cond); \
|
|
||||||
__mask &= ~__cond; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define VECTOR_COND_MOVE(var, val, cond) \
|
|
||||||
do { \
|
|
||||||
_Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \
|
|
||||||
__auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
|
|
||||||
var = VECTOR_MERGE ((val), var, __cond & __mask); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define VECTOR_IF(cond, cond_var) \
|
|
||||||
{ \
|
|
||||||
__auto_type cond_var = (cond); \
|
|
||||||
__auto_type __inv_cond = ~cond_var; \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
#define VECTOR_ELSEIF(cond, cond_var) \
|
|
||||||
} \
|
|
||||||
cond_var = __inv_cond & (cond); \
|
|
||||||
__inv_cond &= ~(cond); \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
#define VECTOR_ELSE(cond_var) \
|
|
||||||
} \
|
|
||||||
cond_var = __inv_cond; \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
#define VECTOR_IF2(cond, cond_var, prev_cond_var) \
|
|
||||||
{ \
|
|
||||||
__auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \
|
|
||||||
__auto_type __inv_cond = ~(cond); \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \
|
|
||||||
} \
|
|
||||||
cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \
|
|
||||||
__inv_cond &= ~(cond); \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
#define VECTOR_ELSE2(cond_var, prev_cond_var) \
|
|
||||||
} \
|
|
||||||
cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \
|
|
||||||
if (!ALL_ZEROES_P (cond_var)) \
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
#define VECTOR_ENDIF \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define VECTOR_INIT_AUX(x, type) \
|
|
||||||
({ \
|
|
||||||
typeof (x) __e = (x); \
|
|
||||||
type __tmp = { \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e, \
|
|
||||||
__e, __e, __e, __e, __e, __e, __e, __e }; \
|
|
||||||
__tmp; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define VECTOR_INIT(x) \
|
|
||||||
(_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \
|
|
||||||
unsigned: VECTOR_INIT_AUX ((x), v64usi), \
|
|
||||||
long: VECTOR_INIT_AUX ((x), v64di), \
|
|
||||||
float: VECTOR_INIT_AUX ((x), v64sf), \
|
|
||||||
double: VECTOR_INIT_AUX ((x), v64df)))
|
|
||||||
|
|
||||||
#define VECTOR_WIDTH(TYPE) (sizeof (TYPE) / (V_SF_SI_P (TYPE) ? 4 : 8))
|
|
||||||
|
|
||||||
#define V_SF_SI_P(TYPE) \
|
|
||||||
(__builtin_types_compatible_p (TYPE, v2sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v4sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v8sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v16sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v32sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v64sf) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v2si) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v4si) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v8si) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v16si) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v32si) \
|
|
||||||
|| __builtin_types_compatible_p (TYPE, v64si))
|
|
||||||
|
|
||||||
#define VECTOR_INIT_MASK(TYPE) \
|
|
||||||
({ \
|
|
||||||
vector_union __mask; \
|
|
||||||
__mask.t_v64di = VECTOR_INIT (0L); \
|
|
||||||
for (int i = 0; i < VECTOR_WIDTH (TYPE); i++) \
|
|
||||||
__mask.t_v64di[i] = -1; \
|
|
||||||
__builtin_choose_expr (V_SF_SI_P (TYPE), __mask.t_v64si, __mask.t_v64di); \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define ALL_ZEROES_P(x) (COND_TO_BITMASK(x) == 0)
|
|
||||||
|
|
||||||
#define COND_TO_BITMASK(x) \
|
|
||||||
({ \
|
|
||||||
long __tmp = 0; \
|
|
||||||
__auto_type __x = __builtin_convertvector((x), typeof (__mask)) & __mask; \
|
|
||||||
__builtin_choose_expr (sizeof (__mask) == 256, \
|
|
||||||
({ asm ("v_cmp_ne_u32_e64 %0, %1, 0" \
|
|
||||||
: "=Sg" (__tmp) \
|
|
||||||
: "v" (__x)); }), \
|
|
||||||
({ asm ("v_cmp_ne_u64_e64 %0, %1, 0" \
|
|
||||||
: "=Sg" (__tmp) \
|
|
||||||
: "v" (__x)); })); \
|
|
||||||
__tmp; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
|
|
||||||
{ \
|
|
||||||
__auto_type cond_var = prev_cond_var; \
|
|
||||||
for (;;) { \
|
|
||||||
cond_var &= (cond); \
|
|
||||||
if (ALL_ZEROES_P (cond_var)) \
|
|
||||||
break;
|
|
||||||
|
|
||||||
#define VECTOR_ENDWHILE \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DEF_VS_MATH_FUNC(rettype, name, args...) \
|
|
||||||
rettype v64sf##_##name##_aux (args, v64si __mask)
|
|
||||||
|
|
||||||
#define DEF_VD_MATH_FUNC(rettype, name, args...) \
|
|
||||||
rettype v64df##_##name##_aux (args, v64di __mask)
|
|
||||||
|
|
||||||
/* Use this for predicate functions that take a vector of doubles but
|
|
||||||
return a vector of ints. */
|
|
||||||
#define DEF_VD_MATH_PRED(rettype, name, args...) \
|
|
||||||
rettype v64df##_##name##_aux (args, v64si __mask)
|
|
||||||
|
|
||||||
#define FUNCTION_INIT(rettype) \
|
|
||||||
rettype __ret
|
|
||||||
|
|
||||||
#define FUNCTION_RETURN \
|
|
||||||
return __ret
|
|
||||||
|
|
||||||
#define DEF_VARIANT(FUN, TRET, TARG, COUNT) \
|
|
||||||
v##COUNT##TRET \
|
|
||||||
v##COUNT##TARG##_##FUN (v##COUNT##TARG __arg) \
|
|
||||||
{ \
|
|
||||||
__auto_type __upsized_arg = RESIZE_VECTOR (v64##TARG, __arg); \
|
|
||||||
__auto_type __mask = VECTOR_INIT_MASK (v##COUNT##TRET); \
|
|
||||||
__auto_type __result = v64##TARG##_##FUN##_aux (__upsized_arg, __mask); \
|
|
||||||
return RESIZE_VECTOR (v##COUNT##TRET, __result); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DEF_VARIANT2(FUN, TRET, TARG, COUNT) \
|
|
||||||
v##COUNT##TRET \
|
|
||||||
v##COUNT##TARG##_##FUN (v##COUNT##TARG __arg1, v##COUNT##TARG __arg2) \
|
|
||||||
{ \
|
|
||||||
__auto_type __upsized_arg1 = RESIZE_VECTOR (v64##TARG, __arg1); \
|
|
||||||
__auto_type __upsized_arg2 = RESIZE_VECTOR (v64##TARG, __arg2); \
|
|
||||||
__auto_type __mask = VECTOR_INIT_MASK (v##COUNT##TRET); \
|
|
||||||
__auto_type __result = v64##TARG##_##FUN##_aux (__upsized_arg1, __upsized_arg2, __mask); \
|
|
||||||
return RESIZE_VECTOR (v##COUNT##TRET, __result); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DEF_VARIANTS(FUN, RETTYPE, ARGTYPE) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 2) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 4) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 8) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 16) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 32) \
|
|
||||||
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 64)
|
|
||||||
|
|
||||||
#define DEF_VARIANTS2(FUN, RETTYPE, ARGTYPE) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 2) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 4) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 8) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 16) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 32) \
|
|
||||||
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 64)
|
|
||||||
|
|
||||||
/* From fdlibm.h */
|
|
||||||
|
|
||||||
#ifdef _FLT_LARGEST_EXPONENT_IS_NORMAL
|
|
||||||
#define FLT_UWORD_IS_FINITE(x) ((x) == (x))
|
|
||||||
#define FLT_UWORD_IS_NAN(x) ((x) != (x))
|
|
||||||
#define FLT_UWORD_IS_INFINITE(x) ((x) != (x))
|
|
||||||
#define FLT_UWORD_MAX 0x7fffffff
|
|
||||||
#define FLT_UWORD_EXP_MAX 0x43010000
|
|
||||||
#define FLT_UWORD_LOG_MAX 0x42b2d4fc
|
|
||||||
#define FLT_UWORD_LOG_2MAX 0x42b437e0
|
|
||||||
#define HUGE ((float)0X1.FFFFFEP128)
|
|
||||||
#else
|
|
||||||
#define FLT_UWORD_IS_FINITE(x) ((x)<0x7f800000)
|
|
||||||
#define FLT_UWORD_IS_NAN(x) ((x)>0x7f800000)
|
|
||||||
#define FLT_UWORD_IS_INFINITE(x) ((x)==0x7f800000)
|
|
||||||
#define FLT_UWORD_MAX 0x7f7fffffL
|
|
||||||
#define FLT_UWORD_EXP_MAX 0x43000000
|
|
||||||
#define FLT_UWORD_LOG_MAX 0x42b17217
|
|
||||||
#define FLT_UWORD_LOG_2MAX 0x42b2d4fc
|
|
||||||
#define HUGE ((float)3.40282346638528860e+38)
|
|
||||||
#endif
|
|
||||||
#define FLT_UWORD_HALF_MAX (FLT_UWORD_MAX-(1L<<23))
|
|
||||||
#define FLT_LARGEST_EXP (FLT_UWORD_MAX>>23)
|
|
||||||
|
|
||||||
#ifdef _FLT_NO_DENORMALS
|
|
||||||
#define FLT_UWORD_IS_ZERO(x) ((x)<0x00800000)
|
|
||||||
#define FLT_UWORD_IS_SUBNORMAL(x) ((x) != (x))
|
|
||||||
#define FLT_UWORD_MIN 0x00800000
|
|
||||||
#define FLT_UWORD_EXP_MIN 0x42fc0000
|
|
||||||
#define FLT_UWORD_LOG_MIN 0x42aeac50
|
|
||||||
#define FLT_SMALLEST_EXP 1
|
|
||||||
#else
|
|
||||||
#define FLT_UWORD_IS_ZERO(x) ((x)==0)
|
|
||||||
#define FLT_UWORD_IS_SUBNORMAL(x) ((x)<0x00800000)
|
|
||||||
#define FLT_UWORD_MIN 0x00000001
|
|
||||||
#define FLT_UWORD_EXP_MIN 0x43160000
|
|
||||||
#define FLT_UWORD_LOG_MIN 0x42cff1b5
|
|
||||||
#define FLT_SMALLEST_EXP -22
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* From zmath.h */
|
|
||||||
|
|
||||||
#define NUM 3
|
|
||||||
#define NAN 2
|
|
||||||
#define INF 1
|
|
||||||
|
|
||||||
#define __PI 3.14159265358979323846
|
|
||||||
#define __SQRT_HALF 0.70710678118654752440
|
|
||||||
#define __PI_OVER_TWO 1.57079632679489661923132
|
|
||||||
#define __INV_PI_OVER_TWO_2_24 10680707.430881743590348355907974
|
|
||||||
|
|
||||||
typedef const union
|
|
||||||
{
|
|
||||||
unsigned int l[2];
|
|
||||||
double d;
|
|
||||||
} udouble;
|
|
||||||
|
|
||||||
typedef const union
|
|
||||||
{
|
|
||||||
unsigned int l;
|
|
||||||
float f;
|
|
||||||
} ufloat;
|
|
||||||
|
|
||||||
extern double BIGX;
|
|
||||||
extern double SMALLX;
|
|
||||||
|
|
||||||
extern udouble z_infinity;
|
|
||||||
extern udouble z_notanum;
|
|
||||||
extern double z_rooteps;
|
|
||||||
|
|
||||||
extern ufloat z_infinity_f;
|
|
||||||
extern ufloat z_notanum_f;
|
|
||||||
extern float z_rooteps_f;
|
|
||||||
|
|
||||||
/* From math_errf.c */
|
|
||||||
|
|
||||||
static v64sf v64sf_math_oflowf (v64si sign)
|
|
||||||
{
|
|
||||||
errno = ERANGE;
|
|
||||||
return VECTOR_MERGE (VECTOR_INIT (-0x1p97f),
|
|
||||||
VECTOR_INIT (0x1p97f), sign) * 0x1p97f;
|
|
||||||
}
|
|
||||||
|
|
||||||
static v64sf v64sf_math_uflowf (v64si sign)
|
|
||||||
{
|
|
||||||
errno = ERANGE;
|
|
||||||
return VECTOR_MERGE (VECTOR_INIT (-0x1p-95f),
|
|
||||||
VECTOR_INIT (0x1p-95f), sign) * 0x1p-95f;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* From math_config.h */
|
|
||||||
|
|
||||||
static v64si v64sf_issignalingf_inline (v64sf x)
|
|
||||||
{
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
v64si ix;
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
/* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
|
|
||||||
significand is 0 for signalling NaN. */
|
|
||||||
return ((ix & 0x7f800000) == 0x7f800000) & ((ix & 0x00400000) == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Vector extensions to sys/reent.h */
|
|
||||||
|
|
||||||
struct v64_reent {
|
|
||||||
v64si _v64si_gamma_signgam;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct v64_reent *_v64_reent;
|
|
||||||
#define _V64_REENT _v64_reent
|
|
||||||
|
|
||||||
#define _REENT_V64SI_SIGNGAM(ptr) ((ptr)->_v64si_gamma_signgam)
|
|
||||||
|
|
||||||
/* Vector extensions to math.h */
|
|
||||||
|
|
||||||
#define v64si_signgam (*__v64si_signgam())
|
|
||||||
extern v64si* __v64si_signgam (void);
|
|
||||||
#define __v64si_signgam_r(ptr) _REENT_V64SI_SIGNGAM(ptr)
|
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_mathcnst.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
double BIGX = 7.09782712893383973096e+02;
|
|
||||||
double SMALLX = -7.45133219101941108420e+02;
|
|
||||||
double z_rooteps = 7.4505859692e-9;
|
|
||||||
float z_rooteps_f = 1.7263349182589107e-4;
|
|
||||||
|
|
||||||
ufloat z_hugeval_f = { 0x7f800000 };
|
|
||||||
ufloat z_infinity_f = { 0x7f800000 };
|
|
||||||
ufloat z_notanum_f = { 0x7fd00000 };
|
|
||||||
|
|
||||||
#ifdef __IEEE_BIG_ENDIAN
|
|
||||||
udouble z_hugeval = { 0x7ff00000, 0 };
|
|
||||||
udouble z_infinity = { 0x7ff00000, 0 };
|
|
||||||
udouble z_notanum = { 0xeff80000, 0 };
|
|
||||||
#else /* __IEEE_LITTLE_ENDIAN */
|
|
||||||
udouble z_hugeval = { 0, 0x7ff00000 };
|
|
||||||
udouble z_infinity = { 0, 0x7ff00000 };
|
|
||||||
udouble z_notanum = { 0, 0x7ff80000 };
|
|
||||||
#endif /* __IEEE_LITTLE_ENDIAN */
|
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static struct v64_reent __reent;
|
|
||||||
struct v64_reent *_v64_reent = &__reent;
|
|
||||||
|
|
||||||
v64si*
|
|
||||||
__v64si_signgam (void)
|
|
||||||
{
|
|
||||||
return &_REENT_V64SI_SIGNGAM(_V64_REENT);
|
|
||||||
}
|
|
|
@ -1,13 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_acos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_asine_aux (v64df x, int acosine, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, acos, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_asine_aux(x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (acos, df, df)
|
|
||||||
|
|
|
@ -1,67 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/e_acosh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_log_aux (v64df, v64di);
|
|
||||||
v64df v64df_log1p_aux (v64df, v64di);
|
|
||||||
v64df v64df_sqrt_aux (v64df, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, acosh, v64df x)
|
|
||||||
{
|
|
||||||
static const double one = 1.0;
|
|
||||||
static const double ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
|
|
||||||
VECTOR_IF (hx < 0x3ff00000, cond) // x < 1 */
|
|
||||||
VECTOR_RETURN ((x-x) / (x-x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx >=0x41b00000, cond) // x > 2**28 */
|
|
||||||
VECTOR_IF2 (hx >=0x7ff00000, cond2, cond) // x is inf of NaN */
|
|
||||||
VECTOR_RETURN (x+x, cond2);
|
|
||||||
VECTOR_ELSE (cond2)
|
|
||||||
/* acosh(huge)=log(2x) */
|
|
||||||
VECTOR_RETURN (v64df_log_aux (x, __mask) + ln2, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (((hx - 0x3ff00000) | lx) == 0, cond)
|
|
||||||
/* acosh(1) = 0 */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx > 0x40000000, cond) /* 2**28 > x > 2 */
|
|
||||||
{
|
|
||||||
v64df t = x * x;
|
|
||||||
VECTOR_RETURN (v64df_log_aux (2.0*x - one /
|
|
||||||
(x + v64df_sqrt_aux (t - one, __mask)),
|
|
||||||
__mask),
|
|
||||||
cond);
|
|
||||||
}
|
|
||||||
VECTOR_ELSE (cond) /* 1<x<2 */
|
|
||||||
{
|
|
||||||
v64df t = x - one;
|
|
||||||
VECTOR_RETURN (v64df_log1p_aux (t + v64df_sqrt_aux(2.0*t + t*t, __mask),
|
|
||||||
__mask),
|
|
||||||
cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (acosh, df, df)
|
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_asin.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_asine_aux (v64df x, int acosine, v64di __mask);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, asin, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_asine_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (asin, df, df)
|
|
|
@ -1,106 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_asine.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_numtest (v64df);
|
|
||||||
v64df v64df_sqrt_aux (v64df, v64di);
|
|
||||||
|
|
||||||
static const double p[] = { -0.27368494524164255994e+2,
|
|
||||||
0.57208227877891731407e+2,
|
|
||||||
-0.39688862997404877339e+2,
|
|
||||||
0.10152522233806463645e+2,
|
|
||||||
-0.69674573447350646411 };
|
|
||||||
static const double q[] = { -0.16421096714498560795e+3,
|
|
||||||
0.41714430248260412556e+3,
|
|
||||||
-0.38186303361750149284e+3,
|
|
||||||
0.15095270841030604719e+3,
|
|
||||||
-0.23823859153670238830e+2 };
|
|
||||||
static const double a[] = { 0.0, 0.78539816339744830962 };
|
|
||||||
static const double b[] = { 1.57079632679489661923, 0.78539816339744830962 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, asine, v64df x, int acosine)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si branch = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si i = v64df_numtest (x);
|
|
||||||
VECTOR_IF ((i == NAN) | (i == INF), cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (x, VECTOR_INIT (z_infinity.d),
|
|
||||||
i == NAN),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df y = __builtin_gcn_fabsv (x);
|
|
||||||
v64df g, res;
|
|
||||||
|
|
||||||
VECTOR_IF (y > 0.5, cond)
|
|
||||||
VECTOR_COND_MOVE (i, VECTOR_INIT (1 - acosine), cond);
|
|
||||||
|
|
||||||
/* Check for range error. */
|
|
||||||
VECTOR_IF2 (y > 1.0, cond2, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (g, (1.0 - y) / 2.0, cond);
|
|
||||||
VECTOR_COND_MOVE (y, -2.0 * v64df_sqrt_aux (g, __mask), cond);
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (i, VECTOR_INIT (acosine), cond);
|
|
||||||
VECTOR_IF2 (y < z_rooteps, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, y, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (g, y * y, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF ((y >= z_rooteps) | __builtin_convertvector(branch, v64di), cond)
|
|
||||||
{
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64df P = ((((p[4] * g + p[3]) * g + p[2]) * g + p[1]) * g + p[0]) * g;
|
|
||||||
v64df Q = ((((g + q[4]) * g + q[3]) * g + q[2]) * g + q[1]) * g + q[0];
|
|
||||||
v64df R = P / Q;
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, y + y * R, cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df a_i = VECTOR_MERGE (VECTOR_INIT (a[1]), VECTOR_INIT (a[0]), i != 0);
|
|
||||||
|
|
||||||
/* Calculate asine or acose. */
|
|
||||||
if (acosine == 0)
|
|
||||||
{
|
|
||||||
VECTOR_COND_MOVE (res, (a_i + res) + a_i, NO_COND);
|
|
||||||
VECTOR_IF (x < 0.0, cond)
|
|
||||||
VECTOR_COND_MOVE (res, -res, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
v64df b_i = VECTOR_MERGE (VECTOR_INIT(b[1]), VECTOR_INIT(b[0]), i != 0);
|
|
||||||
|
|
||||||
VECTOR_IF (x < 0.0, cond)
|
|
||||||
VECTOR_COND_MOVE (res, (b_i + res) + b_i, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (res, (a_i - res) + a_i, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,67 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_asinh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_log_aux (v64df, v64di);
|
|
||||||
v64df v64df_log1p_aux (v64df, v64di);
|
|
||||||
v64df v64df_sqrt_aux (v64df, v64di);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, asinh, v64df x)
|
|
||||||
{
|
|
||||||
static const double one = 1.00000000000000000000e+00; /* 0x3FF00000, 0x00000000 */
|
|
||||||
static const double ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
|
|
||||||
static const double huge = 1.00000000000000000000e+300;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df w;
|
|
||||||
v64si hx;
|
|
||||||
GET_HIGH_WORD (hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix >=0x7ff00000, cond) /* x is inf or NaN */
|
|
||||||
VECTOR_RETURN (x + x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix < 0x3e300000, cond) /* |x|<2**-28 */
|
|
||||||
VECTOR_IF2 (__builtin_convertvector(huge+x > one, v64si), cond2, cond) /* return x inexact except 0 */
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix > 0x41b00000, cond) /* x > 2**28 */
|
|
||||||
VECTOR_COND_MOVE (w, v64df_log_aux (__builtin_gcn_fabsv (x), __mask) + ln2,
|
|
||||||
cond);
|
|
||||||
VECTOR_ELSEIF (ix > 0x40000000, cond) /* 2**28 > |x| > 2.0 */
|
|
||||||
v64df t = __builtin_gcn_fabsv (x);
|
|
||||||
VECTOR_COND_MOVE (w, v64df_log_aux (2.0 * t + one / (v64df_sqrt_aux (x*x + one, __mask) + t), __mask),
|
|
||||||
cond);
|
|
||||||
VECTOR_ELSE (cond) /* 2.0 > |x| > 2**-28 */
|
|
||||||
v64df t = x * x;
|
|
||||||
VECTOR_COND_MOVE (w, v64df_log1p_aux (__builtin_gcn_fabsv (x) + t / (one + v64df_sqrt_aux (one + t, __mask)), __mask),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (hx > 0, cond)
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (-w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (asinh, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,14 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_atan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_atangent_aux (v64df, v64df, v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, atan, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_atangent_aux (x,
|
|
||||||
VECTOR_INIT (0.0),
|
|
||||||
VECTOR_INIT (0.0), 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (atan, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_atan2.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_atangent_aux (v64df, v64df, v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, atan2, v64df v, v64df u)
|
|
||||||
{
|
|
||||||
return (v64df_atangent_aux (VECTOR_INIT (0.0), v, u, 1, __mask));
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (atan2, df, df)
|
|
|
@ -1,132 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_atangent.c in Newlib. */
|
|
||||||
|
|
||||||
#include <float.h>
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_fabsv) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpv_exp)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, atangent, v64df x, v64df v, v64df u, int arctan2)
|
|
||||||
{
|
|
||||||
static const double ROOT3 = 1.73205080756887729353;
|
|
||||||
static const double a[] = { 0.0, 0.52359877559829887308, 1.57079632679489661923,
|
|
||||||
1.04719755119659774615 };
|
|
||||||
static const double q[] = { 0.41066306682575781263e+2,
|
|
||||||
0.86157349597130242515e+2,
|
|
||||||
0.59578436142597344465e+2,
|
|
||||||
0.15024001160028576121e+2 };
|
|
||||||
static const double p[] = { -0.13688768894191926929e+2,
|
|
||||||
-0.20505855195861651981e+2,
|
|
||||||
-0.84946240351320683534e+1,
|
|
||||||
-0.83758299368150059274 };
|
|
||||||
static const float z_rooteps = 7.4505859692e-9;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df zero = VECTOR_INIT (0.0);
|
|
||||||
v64df pi = VECTOR_INIT (__PI);
|
|
||||||
v64df pi_over_two = VECTOR_INIT (__PI_OVER_TWO);
|
|
||||||
v64df res;
|
|
||||||
v64si branch = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
/* Preparation for calculating arctan2. */
|
|
||||||
if (arctan2)
|
|
||||||
{
|
|
||||||
VECTOR_IF (u == 0.0, cond)
|
|
||||||
VECTOR_IF2 (v == 0.0, cond2, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, pi_over_two, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (~branch, cond)
|
|
||||||
/* Get the exponent values of the inputs. */
|
|
||||||
v64si expv = __builtin_gcn_frexpv_exp (v);
|
|
||||||
v64si expu = __builtin_gcn_frexpv_exp (u);
|
|
||||||
|
|
||||||
/* See if a divide will overflow. */
|
|
||||||
v64si e = expv - expu;
|
|
||||||
|
|
||||||
VECTOR_IF2 (e > DBL_MAX_EXP, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, pi_over_two, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Also check for underflow. */
|
|
||||||
VECTOR_IF2 (e < DBL_MIN_EXP, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, zero, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
|
|
||||||
VECTOR_IF (~branch, cond)
|
|
||||||
v64df f;
|
|
||||||
v64si N = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
if (arctan2)
|
|
||||||
f = __builtin_gcn_fabsv (v / u);
|
|
||||||
else
|
|
||||||
f = __builtin_gcn_fabsv (x);
|
|
||||||
|
|
||||||
VECTOR_IF2 (__builtin_convertvector(f > 1.0, v64si), cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (f, 1.0 / f, cond2);
|
|
||||||
VECTOR_COND_MOVE (N, VECTOR_INIT (2), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF2 (__builtin_convertvector(f > (2.0 - ROOT3), v64si), cond2, cond)
|
|
||||||
double A = ROOT3 - 1.0;
|
|
||||||
VECTOR_COND_MOVE (f, (((A * f - 0.5) - 0.5) + f) / (ROOT3 + f),
|
|
||||||
cond2);
|
|
||||||
N += cond2 & 1;
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Check for values that are too small. */
|
|
||||||
VECTOR_IF2 (__builtin_convertvector((-z_rooteps < f) & (f < z_rooteps), v64si), cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, f, cond2);
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64df g = f * f;
|
|
||||||
v64df P = (((p[3] * g + p[2]) * g + p[1]) * g + p[0]) * g;
|
|
||||||
v64df Q = (((g + q[3]) * g + q[2]) * g + q[1]) * g + q[0];
|
|
||||||
v64df R = P / Q;
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, f + f * R, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, cond & (N > 1));
|
|
||||||
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[1]), zero, cond & (N == 1));
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[2]), zero, cond & (N == 2));
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[3]), zero, cond & (N == 3));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
if (arctan2)
|
|
||||||
{
|
|
||||||
/*if (u < 0.0)*/
|
|
||||||
VECTOR_COND_MOVE (res, pi - res, u < 0.0);
|
|
||||||
/*if (v < 0.0)*/
|
|
||||||
VECTOR_COND_MOVE (res, -res, v < 0.0);
|
|
||||||
}
|
|
||||||
/*else if (x < 0.0) */
|
|
||||||
else
|
|
||||||
VECTOR_COND_MOVE (res, -res, x < 0.0);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,61 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/e_atanh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_log1p_aux (v64df, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, atanh, v64df x)
|
|
||||||
{
|
|
||||||
static const double zero = 0.0;
|
|
||||||
static const double one = 1.0, huge = 1e300;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df t;
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF ((ix | ((lx | (-lx)) >> 31)) > 0x3ff00000, cond) // |x|>1
|
|
||||||
VECTOR_RETURN ((x - x)/(x - x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix == 0x3ff00000, cond)
|
|
||||||
VECTOR_RETURN (x / zero, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF ((ix < 0x3e300000) & __builtin_convertvector((huge + x) > zero, v64si), cond) // x<2**-28
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
SET_HIGH_WORD (x, ix, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3fe00000, cond) // x < 0.5 */
|
|
||||||
v64df t2 = x + x;
|
|
||||||
VECTOR_COND_MOVE (t, 0.5 * v64df_log1p_aux (t2 + t2 * x / (one - x), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (t, 0.5 * v64df_log1p_aux ((x + x) / (one - x), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (hx >= 0, cond)
|
|
||||||
VECTOR_RETURN (t, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (-t, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (atanh, df, df)
|
|
|
@ -1,29 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_copysign.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, copysign, v64df x, v64df y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx, hy;
|
|
||||||
GET_HIGH_WORD(hx, x, NO_COND);
|
|
||||||
GET_HIGH_WORD(hy, y, NO_COND);
|
|
||||||
SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000), NO_COND);
|
|
||||||
VECTOR_RETURN (x, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (copysign, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_cos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_sine_aux (v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, cos, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_sine_aux (x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (cos, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_cosh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_sineh_aux (v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, cosh, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_sineh_aux (x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (cosh, df, df)
|
|
|
@ -1,171 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_erf.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_exp_aux (v64df, v64di);
|
|
||||||
|
|
||||||
static const double
|
|
||||||
tiny = 1e-300,
|
|
||||||
half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
|
|
||||||
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
|
|
||||||
two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
|
|
||||||
/* c = (float)0.84506291151 */
|
|
||||||
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf on [0,0.84375]
|
|
||||||
*/
|
|
||||||
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
|
|
||||||
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
|
|
||||||
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
|
|
||||||
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
|
|
||||||
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
|
|
||||||
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
|
|
||||||
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
|
|
||||||
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
|
|
||||||
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
|
|
||||||
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
|
|
||||||
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
|
|
||||||
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf in [0.84375,1.25]
|
|
||||||
*/
|
|
||||||
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
|
|
||||||
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
|
|
||||||
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
|
|
||||||
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
|
|
||||||
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
|
|
||||||
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
|
|
||||||
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
|
|
||||||
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
|
|
||||||
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
|
|
||||||
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
|
|
||||||
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
|
|
||||||
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
|
|
||||||
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1.25,1/0.35]
|
|
||||||
*/
|
|
||||||
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
|
|
||||||
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
|
|
||||||
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
|
|
||||||
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
|
|
||||||
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
|
|
||||||
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
|
|
||||||
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
|
|
||||||
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
|
|
||||||
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
|
|
||||||
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
|
|
||||||
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
|
|
||||||
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
|
|
||||||
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
|
|
||||||
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
|
|
||||||
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
|
|
||||||
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1/.35,28]
|
|
||||||
*/
|
|
||||||
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
|
|
||||||
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
|
|
||||||
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
|
|
||||||
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
|
|
||||||
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
|
|
||||||
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
|
|
||||||
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
|
|
||||||
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
|
|
||||||
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
|
|
||||||
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
|
|
||||||
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
|
|
||||||
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
|
|
||||||
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
|
|
||||||
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, erf, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_HIGH_WORD (hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix >= 0x7ff00000, cond) /* erf(nan)=nan */
|
|
||||||
v64si i = (hx >> 31) << 1;
|
|
||||||
/* erf(+-inf)=+-1 */
|
|
||||||
VECTOR_RETURN (__builtin_convertvector (1 - i, v64df) + one / x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3feb0000, cond) /* |x|<0.84375 */
|
|
||||||
VECTOR_IF2 (ix < 0x3e300000, cond2, cond) /* |x|<2**-28 */
|
|
||||||
VECTOR_IF2 (ix < 0x00800000, cond3, cond2) /* avoid underflow */
|
|
||||||
VECTOR_RETURN (0.125*(8.0*x + efx8*x), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (x + efx*x, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df z = x*x;
|
|
||||||
v64df r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
|
||||||
v64df s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
|
||||||
v64df y = r/s;
|
|
||||||
|
|
||||||
VECTOR_RETURN (x + x*y, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3ff40000, cond) /* 0.84375 <= |x| < 1.25 */
|
|
||||||
v64df s = __builtin_gcn_fabsv (x) - one;
|
|
||||||
v64df P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
|
||||||
v64df Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
|
|
||||||
VECTOR_IF2 (hx >= 0, cond2, cond)
|
|
||||||
VECTOR_RETURN (erx + P/Q, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (-erx - P/Q, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix >= 0x40180000, cond) /* inf>|x|>=6 */
|
|
||||||
VECTOR_IF2 (hx >= 0, cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0 - tiny), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (tiny - 1.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
x = __builtin_gcn_fabsv(x);
|
|
||||||
v64df s = 1.0 / (x*x);
|
|
||||||
v64df R, S;
|
|
||||||
VECTOR_IF (ix < 0x4006DB6E, cond) /* |x| < 1/0.35 */
|
|
||||||
VECTOR_COND_MOVE (R, ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
|
|
||||||
ra5+s*(ra6+s*ra7)))))), cond);
|
|
||||||
VECTOR_COND_MOVE (S, one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
|
|
||||||
sa5+s*(sa6+s*(sa7+s*sa8))))))), cond);
|
|
||||||
VECTOR_ELSE (cond) /* |x| >= 1/0.35 */
|
|
||||||
VECTOR_COND_MOVE (R, rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
|
|
||||||
rb5+s*rb6))))), cond);
|
|
||||||
VECTOR_COND_MOVE (S, one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
|
|
||||||
sb5+s*(sb6+s*sb7)))))), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df z;
|
|
||||||
SET_LOW_WORD (z, VECTOR_INIT(0), NO_COND);
|
|
||||||
v64df r = v64df_exp_aux (-z*z - 0.5625, __mask)
|
|
||||||
* v64df_exp_aux ((z-x)*(z+x) + R/S, __mask);
|
|
||||||
VECTOR_RETURN (one - r/x, hx >= 0);
|
|
||||||
VECTOR_RETURN (r/x - one, hx < 0);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (erf, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,74 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_exp.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_ispos (v64df);
|
|
||||||
v64si v64df_numtest (v64df);
|
|
||||||
|
|
||||||
static const double INV_LN2 = 1.4426950408889634074;
|
|
||||||
static const double LN2 = 0.6931471805599453094172321;
|
|
||||||
static const double p[] = { 0.25, 0.75753180159422776666e-2,
|
|
||||||
0.31555192765684646356e-4 };
|
|
||||||
static const double q[] = { 0.5, 0.56817302698551221787e-1,
|
|
||||||
0.63121894374398504557e-3,
|
|
||||||
0.75104028399870046114e-6 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_ldexpv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, exp, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si num_type = v64df_numtest (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity.d),
|
|
||||||
VECTOR_INIT (0.0),
|
|
||||||
v64df_ispos (x)),
|
|
||||||
cond);
|
|
||||||
VECTOR_ELSEIF (num_type == 0, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Check for out of bounds. */
|
|
||||||
VECTOR_IF ((x > BIGX) | (x < SMALLX), cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Check for a value too small to calculate. */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0),
|
|
||||||
(-z_rooteps_f < x) & (x < z_rooteps_f));
|
|
||||||
|
|
||||||
/* Calculate the exponent. */
|
|
||||||
v64si Nneg = __builtin_convertvector (x * INV_LN2 - 0.5, v64si);
|
|
||||||
v64si Npos = __builtin_convertvector (x * INV_LN2 + 0.5, v64si);
|
|
||||||
v64si N = VECTOR_MERGE (Nneg, Npos, x < 0.0);
|
|
||||||
|
|
||||||
/* Construct the mantissa. */
|
|
||||||
v64df g = x - __builtin_convertvector (N, v64df) * LN2;
|
|
||||||
v64df z = g * g;
|
|
||||||
v64df P = g * ((p[2] * z + p[1]) * z + p[0]);
|
|
||||||
v64df Q = ((q[3] * z + q[2]) * z + q[1]) * z + q[0];
|
|
||||||
v64df R = 0.5 + P / (Q - P);
|
|
||||||
|
|
||||||
/* Return the floating point value. */
|
|
||||||
N++;
|
|
||||||
VECTOR_RETURN (__builtin_gcn_ldexpv (R, N), NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (exp, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,18 +0,0 @@
|
||||||
/* Copyright (C) 2002 by Red Hat, Incorporated. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software
|
|
||||||
* is freely granted, provided that this notice is preserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_exp2.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_pow_aux (v64df, v64df, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, exp2, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_pow_aux (VECTOR_INIT (2.0), x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (exp2, df, df)
|
|
|
@ -1,24 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_finite.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VD_MATH_PRED (v64si, finite, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64si);
|
|
||||||
v64si hx;
|
|
||||||
GET_HIGH_WORD (hx, x, NO_COND);
|
|
||||||
return (((hx & 0x7fffffff) - 0x7ff00000) >> 31) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (finite, si, df)
|
|
|
@ -1,185 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_fmod.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT(v64df);
|
|
||||||
|
|
||||||
v64si hx, hy, hz;
|
|
||||||
v64usi lx, ly, lz;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
EXTRACT_WORDS (hy, ly, y);
|
|
||||||
v64si sx = hx & 0x80000000; /* sign of x */
|
|
||||||
hx ^=sx; /* |x| */
|
|
||||||
hy &= 0x7fffffff; /* |y| */
|
|
||||||
|
|
||||||
v64df zeroes = VECTOR_MERGE (VECTOR_INIT (-0.0),
|
|
||||||
VECTOR_INIT (0.0),
|
|
||||||
sx != 0);
|
|
||||||
|
|
||||||
/* purge off exception values */
|
|
||||||
VECTOR_IF (((hy | ly) == 0) | (hx >= 0x7ff00000)
|
|
||||||
| ((hy | ((ly | -ly) >> 31)) > 0x7ff00000), cond) // y=0, or x not finite or y is NaN
|
|
||||||
VECTOR_RETURN ((x * y) / (x * y), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx <= hy, cond) // |x|<|y| return x
|
|
||||||
VECTOR_IF2 ((hx < hy) | (lx < ly), cond2, cond)
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2 (lx == ly, cond2, cond)
|
|
||||||
VECTOR_RETURN (zeroes, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine ix = ilogb(x) */
|
|
||||||
v64si ix;
|
|
||||||
VECTOR_IF (hx < 0x00100000, cond) // subnormal x
|
|
||||||
VECTOR_IF2 (hx == 0, cond2, cond)
|
|
||||||
ix = VECTOR_INIT (-1043);
|
|
||||||
for (v64si i = __builtin_convertvector (lx, v64si);
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
ix = VECTOR_INIT (-1022);
|
|
||||||
for (v64si i = __builtin_convertvector (hx << 11, v64si);
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (ix, (hx >> 20) - 1023, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine iy = ilogb(y) */
|
|
||||||
v64si iy;
|
|
||||||
VECTOR_IF (hy < 0x00100000, cond) // subnormal y
|
|
||||||
VECTOR_IF2 (hy == 0, cond2, cond)
|
|
||||||
iy = VECTOR_INIT (-1043);
|
|
||||||
for (v64si i = __builtin_convertvector (ly, v64si);
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
iy = VECTOR_INIT (-1022);
|
|
||||||
for (v64si i = __builtin_convertvector (hy << 11, v64si);
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (iy, (hy >> 20) - 1023, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
|
|
||||||
/* set up {hx,lx}, {hy,ly} and align y to x */
|
|
||||||
VECTOR_IF (ix >= -1022, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, 0x00100000 | (0x000fffff & hx), cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal x, shift x to normal
|
|
||||||
{
|
|
||||||
v64si n = -1022 - ix;
|
|
||||||
VECTOR_IF2 (n <= 31, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, (hx << n) | (lx >> (32 - n)), cond2);
|
|
||||||
VECTOR_COND_MOVE (lx, lx << n, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, __builtin_convertvector (lx << (n - 32), v64si), cond2);
|
|
||||||
VECTOR_COND_MOVE (lx, VECTOR_INIT (0U), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (iy >= -1022, cond)
|
|
||||||
VECTOR_COND_MOVE (hy, 0x00100000 | (0x000fffff & hy), cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal y, shift y to normal
|
|
||||||
{
|
|
||||||
v64si n = -1022 - iy;
|
|
||||||
VECTOR_IF2 (n <= 31, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hy, (hy << n) | (ly >> (32 - n)), cond2);
|
|
||||||
VECTOR_COND_MOVE (ly, ly << n, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hy, __builtin_convertvector (ly << (n - 32), v64si), cond2);
|
|
||||||
VECTOR_COND_MOVE (ly, VECTOR_INIT (0U), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* fix point fmod */
|
|
||||||
v64si n = ix - iy;
|
|
||||||
v64si cond = n != 0;
|
|
||||||
|
|
||||||
while (!ALL_ZEROES_P (cond))
|
|
||||||
{
|
|
||||||
hz = hx - hy;
|
|
||||||
lz = lx - ly;
|
|
||||||
VECTOR_IF2 (lx < ly, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hz, hz - 1, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2 (hz < 0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2);
|
|
||||||
VECTOR_COND_MOVE (lx, lx + lx, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0
|
|
||||||
VECTOR_RETURN (zeroes, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2);
|
|
||||||
VECTOR_COND_MOVE (lx, lz + lz, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
n += cond; // Active lanes should be -1
|
|
||||||
cond &= (n != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
hz = hx - hy;
|
|
||||||
lz = lx - ly;
|
|
||||||
VECTOR_COND_MOVE (hz, hz - 1, lx < ly);
|
|
||||||
VECTOR_IF (hz >= 0, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, hz, cond);
|
|
||||||
VECTOR_COND_MOVE (lx, lz, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* convert back to floating value and restore the sign */
|
|
||||||
VECTOR_RETURN (zeroes, (hx | lx) == 0); // return sign(x)*0
|
|
||||||
cond = hx < 0x00100000;
|
|
||||||
while (!ALL_ZEROES_P (cond)) // normalize x
|
|
||||||
{
|
|
||||||
VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond);
|
|
||||||
VECTOR_COND_MOVE (lx, lx + lx, cond);
|
|
||||||
iy += cond; // Active lanes should be -1
|
|
||||||
|
|
||||||
cond &= (hx < 0x00100000);
|
|
||||||
}
|
|
||||||
VECTOR_IF (iy >= -1022, cond) // normalize output
|
|
||||||
VECTOR_COND_MOVE (hx, (hx - 0x00100000) | ((iy + 1023) << 20), cond);
|
|
||||||
INSERT_WORDS (x, hx | sx, lx, cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal output */
|
|
||||||
n = -1022 - iy;
|
|
||||||
VECTOR_IF2 (n <= 20, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (lx, (lx >> n) | (hx << (32 - n)), cond2);
|
|
||||||
VECTOR_COND_MOVE (hx, hx >> n, cond2);
|
|
||||||
VECTOR_ELSEIF2 (n <= 31, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (lx, __builtin_convertvector ((hx << (32 - n)) | (lx >> n), v64usi), cond2);
|
|
||||||
VECTOR_COND_MOVE (hx, sx, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (lx, __builtin_convertvector (hx >> (n - 32), v64usi), cond2);
|
|
||||||
VECTOR_COND_MOVE (hx, sx, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
INSERT_WORDS (x, hx | sx, lx, cond);
|
|
||||||
x *= VECTOR_INIT (1.0); /* create necessary signal */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN (x, NO_COND); /* exact output */
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (fmod, df, df)
|
|
|
@ -1,10 +0,0 @@
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_lgamma_aux (v64df x, v64di __mask);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, gamma, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_lgamma_aux(x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (gamma, df, df)
|
|
|
@ -1,113 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/e_hypot.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_sqrt_aux (v64df, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, hypot, v64df x, v64df y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df a = x;
|
|
||||||
v64df b = y;
|
|
||||||
|
|
||||||
v64si ha;
|
|
||||||
GET_HIGH_WORD (ha, x, NO_COND);
|
|
||||||
ha &= 0x7fffffffL;
|
|
||||||
v64si hb;
|
|
||||||
GET_HIGH_WORD (hb, y, NO_COND);
|
|
||||||
hb &= 0x7fffffffL;
|
|
||||||
|
|
||||||
VECTOR_IF (hb > ha, cond)
|
|
||||||
VECTOR_COND_MOVE (a, y, cond);
|
|
||||||
VECTOR_COND_MOVE (b, x, cond);
|
|
||||||
v64si j = ha;
|
|
||||||
VECTOR_COND_MOVE (ha, hb, cond);
|
|
||||||
VECTOR_COND_MOVE (hb, j, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
SET_HIGH_WORD (a, ha, NO_COND); /* a <- |a| */
|
|
||||||
SET_HIGH_WORD (b, hb, NO_COND); /* b <- |b| */
|
|
||||||
VECTOR_IF((ha - hb) > 0x3c00000L, cond) // x/y > 2**60 */
|
|
||||||
VECTOR_RETURN (a + b, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64si k = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
VECTOR_IF (ha > 0x5f300000L, cond) /* a>2**500 */
|
|
||||||
VECTOR_IF2 (ha >= 0x7ff00000L, cond2, cond) /* Inf or NaN */
|
|
||||||
v64df w = a + b; // for sNaN */
|
|
||||||
v64si low;
|
|
||||||
GET_LOW_WORD (low, a, cond2);
|
|
||||||
VECTOR_COND_MOVE (w, a, cond2 & (((ha & 0xfffff) | low) == 0));
|
|
||||||
GET_LOW_WORD (low, b, cond2);
|
|
||||||
VECTOR_COND_MOVE (w, b, cond2 & (((hb & 0xfffff) | low) == 0));
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* scale a and b by 2**-600 */
|
|
||||||
VECTOR_COND_MOVE (ha, ha - 0x25800000, cond);
|
|
||||||
VECTOR_COND_MOVE (hb, hb - 0x25800000, cond);
|
|
||||||
VECTOR_COND_MOVE (k, k + 600, cond);
|
|
||||||
SET_HIGH_WORD (a, ha, cond);
|
|
||||||
SET_HIGH_WORD (b, hb, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hb < 0x20b00000, cond) /* b < 2**-500 */
|
|
||||||
VECTOR_IF2 (hb <= 0x000fffff, cond2, cond) /* subnormal b or 0 */
|
|
||||||
v64si low;
|
|
||||||
GET_LOW_WORD (low, b, cond);
|
|
||||||
VECTOR_RETURN (a, cond2 & ((hb | low) == 0));
|
|
||||||
/* t1=2^1022 */
|
|
||||||
v64df t1 = VECTOR_INIT (0.0);
|
|
||||||
SET_HIGH_WORD (t1, VECTOR_INIT (0x7fd00000), cond2);
|
|
||||||
VECTOR_COND_MOVE (b, b * t1, cond2);
|
|
||||||
VECTOR_COND_MOVE (a, a * t1, cond2);
|
|
||||||
VECTOR_COND_MOVE (k, k - 1022, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond) /* scale a and b by 2^600 */
|
|
||||||
VECTOR_COND_MOVE (ha, ha + 0x25800000, cond2); /* a *= 2^600 */
|
|
||||||
VECTOR_COND_MOVE (hb, hb + 0x25800000, cond2); /* b *= 2^600 */
|
|
||||||
VECTOR_COND_MOVE (k, k - 600, cond2);
|
|
||||||
SET_HIGH_WORD (a, ha, cond2);
|
|
||||||
SET_HIGH_WORD (b, hb, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* medium size a and b */
|
|
||||||
v64df w = a - b;
|
|
||||||
VECTOR_IF (w > b, cond)
|
|
||||||
v64df t1 = VECTOR_INIT (0.0);
|
|
||||||
SET_HIGH_WORD (t1, ha, cond);
|
|
||||||
v64df t2 = a - t1;
|
|
||||||
VECTOR_COND_MOVE (w, v64df_sqrt_aux (t1*t1 - (b*(-b) - t2 * (a + t1)), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (a, a+a, cond);
|
|
||||||
v64df y1 = VECTOR_INIT (0.0);
|
|
||||||
SET_HIGH_WORD (y1, hb, cond);
|
|
||||||
v64df y2 = b - y1;
|
|
||||||
v64df t1;
|
|
||||||
SET_HIGH_WORD (t1, ha + 0x00100000, cond);
|
|
||||||
v64df t2 = a - t1;
|
|
||||||
VECTOR_COND_MOVE (w, v64df_sqrt_aux (t1*y1 - (w*(-w) - (t1*y2 + t2*b)), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (k != 0, cond)
|
|
||||||
v64si high;
|
|
||||||
v64df t1 = VECTOR_INIT (1.0);
|
|
||||||
GET_HIGH_WORD (high, t1, cond);
|
|
||||||
SET_HIGH_WORD (t1, high + (k << 20), cond);
|
|
||||||
VECTOR_RETURN (t1 * w, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (hypot, df, df)
|
|
|
@ -1,45 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_ilogb.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VD_MATH_PRED (v64si, ilogb, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT(v64si);
|
|
||||||
v64si hx, lx, ix;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
hx &= 0x7fffffff;
|
|
||||||
VECTOR_IF (hx < 0x00100000, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond & ((hx | lx) == 0)); // FP_ILOGB0
|
|
||||||
VECTOR_IF2 (hx == 0, cond2, cond)
|
|
||||||
ix = VECTOR_INIT (-1043);
|
|
||||||
for (v64si i = lx;
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
ix = VECTOR_INIT (-1022);
|
|
||||||
for (v64si i = (hx << 11);
|
|
||||||
!ALL_ZEROES_P (cond2 & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (ix, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN ((hx >> 20) - 1023, hx < 0x7ff00000);
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (ilogb, si, df)
|
|
|
@ -1,27 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_isnan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VD_MATH_PRED (v64si, isnan, v64df x)
|
|
||||||
{
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
hx &= 0x7fffffff;
|
|
||||||
hx |= (lx | (-lx)) >> 31;
|
|
||||||
hx = 0x7ff00000 - hx;
|
|
||||||
|
|
||||||
return (hx >> 31) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (isnan, si, df)
|
|
|
@ -1,18 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_ispos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si
|
|
||||||
v64df_ispos (v64df x)
|
|
||||||
{
|
|
||||||
// Explicitly create mask for internal function.
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
FUNCTION_INIT (v64si);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_HIGH_WORD (hx, x, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_RETURN ((hx & 0x80000000) == 0, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/w_lgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_finite_aux (v64df x, v64di __mask);
|
|
||||||
v64df v64df_lgamma_r_aux (v64df x, v64si *signgamp, v64di __mask);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, lgamma, v64df x)
|
|
||||||
{
|
|
||||||
v64df y = v64df_lgamma_r_aux(x, &(_REENT_V64SI_SIGNGAM(_V64_REENT)), __mask);
|
|
||||||
if (ALL_ZEROES_P(v64df_finite_aux(y, __mask)) & !ALL_ZEROES_P(v64df_finite_aux(x, __mask))) {
|
|
||||||
/* lgamma(finite) overflow */
|
|
||||||
errno = ERANGE;
|
|
||||||
}
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (lgamma, df, df)
|
|
|
@ -1,286 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/er_lgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const double two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
|
|
||||||
half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
|
|
||||||
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
|
|
||||||
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
|
|
||||||
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
|
|
||||||
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
|
|
||||||
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
|
|
||||||
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
|
|
||||||
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
|
|
||||||
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
|
|
||||||
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
|
|
||||||
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
|
|
||||||
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
|
|
||||||
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
|
|
||||||
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
|
|
||||||
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
|
|
||||||
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
|
|
||||||
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
|
|
||||||
/* tt = -(tail of tf) */
|
|
||||||
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
|
|
||||||
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
|
|
||||||
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
|
|
||||||
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
|
|
||||||
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
|
|
||||||
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
|
|
||||||
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
|
|
||||||
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
|
|
||||||
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
|
|
||||||
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
|
|
||||||
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
|
|
||||||
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
|
|
||||||
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
|
|
||||||
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
|
|
||||||
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
|
|
||||||
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
|
|
||||||
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
|
||||||
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
|
|
||||||
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
|
|
||||||
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
|
|
||||||
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
|
|
||||||
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
|
|
||||||
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
|
|
||||||
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
|
|
||||||
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
|
|
||||||
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
|
|
||||||
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
|
|
||||||
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
|
||||||
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
|
|
||||||
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
|
|
||||||
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
|
|
||||||
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
|
|
||||||
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
|
|
||||||
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
|
|
||||||
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
|
|
||||||
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
|
|
||||||
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
|
|
||||||
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
|
|
||||||
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
|
|
||||||
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
|
|
||||||
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
|
|
||||||
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
|
|
||||||
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
|
|
||||||
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
|
|
||||||
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
|
|
||||||
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
|
|
||||||
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
|
|
||||||
|
|
||||||
static const double zero= 0.00000000000000000000e+00;
|
|
||||||
|
|
||||||
v64df v64df_cos_aux (v64df x, v64di __mask);
|
|
||||||
v64df v64df_log_aux (v64df x, v64di __mask);
|
|
||||||
v64df v64df_sin_aux (v64df x, v64di __mask);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_floorv) \
|
|
||||||
&& __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
static v64df
|
|
||||||
v64df_sin_pi (v64df x)
|
|
||||||
{
|
|
||||||
// Explicitly create mask for internal function.
|
|
||||||
v64di __mask = VECTOR_INIT (-1L);
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df y, z;
|
|
||||||
v64si n, ix;
|
|
||||||
|
|
||||||
GET_HIGH_WORD (ix, x, NO_COND);
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3fd00000, cond)
|
|
||||||
VECTOR_RETURN (v64df_sin_aux (pi * x, __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
y = -x; /* x is assume negative */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* argument reduction, make sure inexact flag not raised if input
|
|
||||||
* is an integer
|
|
||||||
*/
|
|
||||||
z = __builtin_gcn_floorv (y);
|
|
||||||
VECTOR_IF (z != y, cond)
|
|
||||||
/* inexact anyway */
|
|
||||||
VECTOR_COND_MOVE(y, y * 0.5, cond);
|
|
||||||
VECTOR_COND_MOVE(y, 2.0 * (y - __builtin_gcn_floorv (y)), cond); /* y = |x| mod 2.0 */
|
|
||||||
VECTOR_COND_MOVE(n, __builtin_convertvector(y * 4.0, v64si), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_IF2 (__builtin_convertvector(ix >= 0x43400000, v64di), cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(y, VECTOR_INIT(zero), cond2);
|
|
||||||
VECTOR_COND_MOVE(n, VECTOR_INIT(0), cond2); /* y must be even */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y + two52 /* exact */, cond2 & __builtin_convertvector(ix < 0x43300000, v64di));
|
|
||||||
GET_LOW_WORD (n, z, cond2);
|
|
||||||
VECTOR_COND_MOVE(n, n & 1, cond2);
|
|
||||||
VECTOR_COND_MOVE(y, __builtin_convertvector(n, v64df), cond2);
|
|
||||||
VECTOR_COND_MOVE(n, n << 2, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (n == 0, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * y, __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 1 | n == 2, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64df_cos_aux (pi * (0.5 - y), __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 3 | n == 4, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * (VECTOR_INIT(one) - y), __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 5 | n == 6, cond)
|
|
||||||
VECTOR_COND_MOVE(y, -v64df_cos_aux (pi * (y - 1.5), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * (y - 2.0), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN(-y, NO_COND);
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, lgamma_r, v64df x, v64si *signgamp)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df t,y,z,nadj = VECTOR_INIT(0.0),p,p1,p2,p3,q,r,w;
|
|
||||||
v64si i,hx,lx,ix;
|
|
||||||
|
|
||||||
EXTRACT_WORDS(hx,lx,x);
|
|
||||||
|
|
||||||
/* purge off +-inf, NaN, +-0, and negative arguments */
|
|
||||||
*signgamp = VECTOR_INIT(1);
|
|
||||||
ix = hx&0x7fffffff;
|
|
||||||
VECTOR_IF(ix>=0x7ff00000, cond)
|
|
||||||
VECTOR_RETURN (x*x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF((ix|lx)==0, cond)
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (hx<0));
|
|
||||||
VECTOR_RETURN(one/(x-x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix < 0x3b900000, cond) /* |x|<2**-70, return -log(|x|) */
|
|
||||||
VECTOR_IF2(hx<0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond);
|
|
||||||
VECTOR_RETURN (-v64df_log_aux(-x, __mask), cond2);
|
|
||||||
VECTOR_ELSE2(cond2, cond)
|
|
||||||
VECTOR_RETURN (-v64df_log_aux(x, __mask), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx < 0, cond)
|
|
||||||
VECTOR_IF2(ix>=0x43300000, cond2, cond) /* |x|>=2**52, must be -integer */
|
|
||||||
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE (t, v64df_sin_pi (x), cond);
|
|
||||||
VECTOR_IF2(__builtin_convertvector(t==zero, v64si), cond2, cond)
|
|
||||||
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE(nadj, v64df_log_aux(VECTOR_INIT(pi)/__builtin_gcn_fabsv(t*x), __mask), cond);
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & __builtin_convertvector(t < zero, v64si));
|
|
||||||
VECTOR_COND_MOVE(x, -x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* purge off 1 and 2 */
|
|
||||||
VECTOR_IF((((ix-0x3ff00000)|lx)==0)|(((ix-0x40000000)|lx)==0), cond)
|
|
||||||
VECTOR_COND_MOVE(r, VECTOR_INIT(0.0), cond);
|
|
||||||
/* for x < 2.0 */
|
|
||||||
VECTOR_ELSEIF(ix<0x40000000, cond)
|
|
||||||
VECTOR_IF2(ix<=0x3feccccc, cond2, cond)
|
|
||||||
/* lgamma(x) = lgamma(x+1)-log(x) */
|
|
||||||
r = -v64df_log_aux(x, __mask);
|
|
||||||
VECTOR_IF2(ix>=0x3FE76944, cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, one-x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
|
|
||||||
VECTOR_ELSEIF2(ix>=0x3FCDA661, cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x-(tc-one), cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
|
|
||||||
VECTOR_ELSE2(cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE2(cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(r, VECTOR_INIT(zero), cond2);
|
|
||||||
VECTOR_IF2(ix>=0x3FFBB4C3, cond3, cond2) /* [1.7316,2] */
|
|
||||||
VECTOR_COND_MOVE(y, VECTOR_INIT(2.0)-x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
|
|
||||||
VECTOR_ELSEIF2(ix>=0x3FF3B4C4, cond3, cond2) /* [1.23,1.73] */
|
|
||||||
VECTOR_COND_MOVE(y, x-tc, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
|
|
||||||
VECTOR_ELSE2(cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x-one, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF2(i==0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(p1, a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p2, z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p, y*p1+p2, cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (p-0.5*y), cond2);
|
|
||||||
VECTOR_ELSEIF2(i==1, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(w, z*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(p1, t0+w*(t3+w*(t6+w*(t9 +w*t12))), cond2); /* parallel comp */
|
|
||||||
VECTOR_COND_MOVE(p2, t1+w*(t4+w*(t7+w*(t10+w*t13))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p3, t2+w*(t5+w*(t8+w*(t11+w*t14))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p, z*p1-(tt-w*(p2+y*p3)), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (tf + p), cond2);
|
|
||||||
VECTOR_ELSEIF2(i==2, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(p1, y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p2, one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (-0.5*y + p1/p2), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSEIF(ix<0x40200000, cond)
|
|
||||||
/* x < 8.0 */
|
|
||||||
VECTOR_COND_MOVE(i, __builtin_convertvector(x, v64si), cond);
|
|
||||||
VECTOR_COND_MOVE(t, VECTOR_INIT(zero), cond);
|
|
||||||
VECTOR_COND_MOVE(y, x-__builtin_convertvector(i, v64df), cond);
|
|
||||||
VECTOR_COND_MOVE(p, y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))), cond);
|
|
||||||
VECTOR_COND_MOVE(q, one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))), cond);
|
|
||||||
VECTOR_COND_MOVE(r, half*y+p/q, cond);
|
|
||||||
VECTOR_COND_MOVE(z, VECTOR_INIT(one), cond); /* lgamma(1+s) = log(s) + lgamma(s) */
|
|
||||||
VECTOR_IF2(i==7, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+6.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i==7 | i==6, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+5.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=5, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+4.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=4, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+3.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=3, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+2.0), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + v64df_log_aux(z, __mask), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* 8.0 <= x < 2**58 */
|
|
||||||
VECTOR_ELSEIF(ix < 0x43900000, cond)
|
|
||||||
VECTOR_COND_MOVE(t, v64df_log_aux(x, __mask), cond);
|
|
||||||
VECTOR_COND_MOVE(z, one/x, cond);
|
|
||||||
VECTOR_COND_MOVE(y, z*z, cond);
|
|
||||||
VECTOR_COND_MOVE(w, w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))), cond);
|
|
||||||
VECTOR_COND_MOVE(r, (x-half)*(t-one)+w, cond);
|
|
||||||
VECTOR_ELSE(cond)
|
|
||||||
/* 2**58 <= x <= inf */
|
|
||||||
VECTOR_COND_MOVE(r, x*(v64df_log_aux(x, __mask)-one), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF(hx<0, cond)
|
|
||||||
VECTOR_COND_MOVE(r, nadj - r, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN(r, NO_COND);
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,82 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_logarithm.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_finite (v64df);
|
|
||||||
v64si v64df_isnan (v64df);
|
|
||||||
|
|
||||||
static const double a[] = { -0.64124943423745581147e+02,
|
|
||||||
0.16383943563021534222e+02,
|
|
||||||
-0.78956112887481257267 };
|
|
||||||
static const double b[] = { -0.76949932108494879777e+03,
|
|
||||||
0.31203222091924532844e+03,
|
|
||||||
-0.35667977739034646171e+02 };
|
|
||||||
static const double C1 = 22713.0 / 32768.0;
|
|
||||||
static const double C2 = 1.428606820309417232e-06;
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpv_mant) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpv_exp) \
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, log, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
/* Check for domain/range errors here. */
|
|
||||||
VECTOR_IF (x == 0.0, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (-z_infinity.d), cond);
|
|
||||||
VECTOR_ELSEIF (x < 0.0, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
|
|
||||||
VECTOR_ELSEIF (__builtin_convertvector (~v64df_finite (x), v64di), cond)
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_notanum.d),
|
|
||||||
VECTOR_INIT (z_infinity.d),
|
|
||||||
v64df_isnan (x)),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Get the exponent and mantissa where x = f * 2^N. */
|
|
||||||
v64df f = __builtin_gcn_frexpv_mant (x);
|
|
||||||
v64si N = __builtin_gcn_frexpv_exp (x);
|
|
||||||
|
|
||||||
v64df z = f - 0.5;
|
|
||||||
|
|
||||||
VECTOR_IF (f > __SQRT_HALF, cond)
|
|
||||||
VECTOR_COND_MOVE (z, (z - 0.5) / (f * 0.5 + 0.5), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (N, N - 1, cond);
|
|
||||||
VECTOR_COND_MOVE (z, z / (z * 0.5 + 0.5), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df w = z * z;
|
|
||||||
|
|
||||||
/* Use Newton's method with 4 terms. */
|
|
||||||
z += z * w * ((a[2] * w + a[1]) * w + a[0]) / (((w + b[2]) * w + b[1]) * w + b[0]);
|
|
||||||
|
|
||||||
v64df Nf = __builtin_convertvector (N, v64df);
|
|
||||||
VECTOR_COND_MOVE (z, (Nf * C2 + z) + Nf * C1, N != 0);
|
|
||||||
|
|
||||||
VECTOR_RETURN (z, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log, df, df)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, log1p, v64df x)
|
|
||||||
{
|
|
||||||
/* TODO: Implement algorithm with better precision. */
|
|
||||||
return v64df_log_aux (1 + x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log1p, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,21 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_algorithm.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_log_aux (v64df, v64di);
|
|
||||||
|
|
||||||
static const double C3 = 0.43429448190325182765;
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, log10, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_log_aux (x, __mask) * C3;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log10, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_log_aux (v64df, v64di);
|
|
||||||
|
|
||||||
static const double C3 = 1.4426950408889634073599246810019;
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, log2, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_log_aux (x, __mask) * C3;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log2, df, df)
|
|
|
@ -1,66 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_modf.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_numtest (v64df);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, modf, v64df x, v64df *iptr)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
v64df ret_i;
|
|
||||||
|
|
||||||
v64si i0, i1;
|
|
||||||
EXTRACT_WORDS(i0, i1, x);
|
|
||||||
v64si j0 = ((i0 >> 20) & 0x7ff) - 0x3ff; /* exponent of x */
|
|
||||||
v64df zero;
|
|
||||||
v64si i;
|
|
||||||
INSERT_WORDS (zero, i0 & 0x80000000, VECTOR_INIT (0), NO_COND);
|
|
||||||
|
|
||||||
VECTOR_IF (j0 < 20, cond) /* integer part in x*/
|
|
||||||
VECTOR_IF2 (j0 < 0, cond2, cond) /* |x|<1 */
|
|
||||||
VECTOR_COND_MOVE (ret_i, zero, cond2);
|
|
||||||
VECTOR_RETURN (x, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
i = (0x000fffff) >> j0;
|
|
||||||
|
|
||||||
VECTOR_IF2 (((i0 & i) | i1) == 0, cond3, cond2) /* x is integral */
|
|
||||||
VECTOR_COND_MOVE (ret_i, x, cond3);
|
|
||||||
VECTOR_RETURN (zero, cond3);
|
|
||||||
VECTOR_ELSE2 (cond3, cond2)
|
|
||||||
INSERT_WORDS (ret_i, i0 & ~i, VECTOR_INIT (0), cond3);
|
|
||||||
VECTOR_RETURN (x - ret_i, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSEIF (j0 > 51, cond) /* no fraction part */
|
|
||||||
VECTOR_COND_MOVE (ret_i, x, cond);
|
|
||||||
VECTOR_IF2 (v64df_numtest (x) == NAN, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (ret_i, x + x, cond2);
|
|
||||||
VECTOR_RETURN (ret_i, cond2); /* x is NaN, return NaN */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (zero, cond); /* return +- 0 */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
i = 0xffffffff >> (j0 - 20);
|
|
||||||
VECTOR_IF2 ((i1 & i) == 0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (ret_i, x, cond2);
|
|
||||||
INSERT_WORDS (x, i0 & 0x80000000, VECTOR_INIT (0), cond2);
|
|
||||||
VECTOR_RETURN (x, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
INSERT_WORDS (ret_i, i0, i1 & ~i, cond2);
|
|
||||||
VECTOR_RETURN (x - ret_i, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
*iptr = ret_i;
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_numtest.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si
|
|
||||||
v64df_numtest (v64df x)
|
|
||||||
{
|
|
||||||
// Explicitly create mask for internal function.
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
FUNCTION_INIT (v64si);
|
|
||||||
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
v64si exp = (hx & 0x7ff00000) >> 20;
|
|
||||||
|
|
||||||
/* Check for a zero input. */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0), x == 0.0);
|
|
||||||
|
|
||||||
/* Check for not a number or infinity. */
|
|
||||||
VECTOR_IF (exp == 0x7ff, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (NAN),
|
|
||||||
VECTOR_INIT (INF),
|
|
||||||
((hx & 0xf0000) != 0) | (lx != 0)),
|
|
||||||
cond);
|
|
||||||
/* Otherwise it's a finite value. */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (NUM), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,322 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/e_pow.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const double
|
|
||||||
bp[] = {1.0, 1.5,},
|
|
||||||
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
|
|
||||||
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
|
|
||||||
zero = 0.0,
|
|
||||||
one = 1.0,
|
|
||||||
two = 2.0,
|
|
||||||
two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
|
|
||||||
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
|
|
||||||
L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
|
|
||||||
L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
|
|
||||||
L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
|
|
||||||
L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
|
|
||||||
L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
|
|
||||||
L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
|
|
||||||
P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
|
|
||||||
P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
|
|
||||||
P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
|
|
||||||
P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
|
|
||||||
P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
|
|
||||||
lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
|
|
||||||
lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
|
|
||||||
lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
|
|
||||||
ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
|
|
||||||
cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
|
|
||||||
cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
|
|
||||||
cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
|
|
||||||
ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
|
|
||||||
ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
|
|
||||||
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
|
|
||||||
|
|
||||||
v64df v64df_sqrt_aux (v64df, v64di);
|
|
||||||
v64df v64df_scalbn_aux (v64df, v64si, v64di);
|
|
||||||
|
|
||||||
static v64df v64df_math_oflow (v64di sign)
|
|
||||||
{
|
|
||||||
errno = ERANGE;
|
|
||||||
return VECTOR_MERGE (VECTOR_INIT (-0x1p769),
|
|
||||||
VECTOR_INIT (0x1p769), sign) * 0x1p769;
|
|
||||||
}
|
|
||||||
|
|
||||||
static v64df v64df_math_uflow (v64di sign)
|
|
||||||
{
|
|
||||||
errno = ERANGE;
|
|
||||||
return VECTOR_MERGE (VECTOR_INIT (-0x1p-767),
|
|
||||||
VECTOR_INIT (0x1p-767), sign) * 0x1p-767;
|
|
||||||
}
|
|
||||||
|
|
||||||
static v64si v64df_issignaling_inline (v64df x)
|
|
||||||
{
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
v64si ix;
|
|
||||||
GET_HIGH_WORD (ix, x, NO_COND);
|
|
||||||
/* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
|
|
||||||
significand is 0 for signalling NaN. */
|
|
||||||
return ((ix & 0x7ff00000) == 0x7ff00000) & ((ix & 0x00080000) == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, pow, v64df x, v64df y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx, hy, lx, ly;
|
|
||||||
EXTRACT_WORDS(hx,lx,x);
|
|
||||||
EXTRACT_WORDS(hy,ly,y);
|
|
||||||
v64si ix = hx&0x7fffffff;
|
|
||||||
v64si iy = hy&0x7fffffff;
|
|
||||||
|
|
||||||
/* y==zero: x**0 = 1 unless x is snan */
|
|
||||||
VECTOR_IF ((iy|ly)==0, cond)
|
|
||||||
VECTOR_RETURN (x + y, cond & v64df_issignaling_inline(x));
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* x|y==NaN return NaN unless x==1 then return 1 */
|
|
||||||
VECTOR_IF ((ix > 0x7ff00000) | ((ix==0x7ff00000)&(lx!=0))
|
|
||||||
| (iy > 0x7ff00000) | ((iy==0x7ff00000)&(ly!=0)), cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0), cond & ((hx-0x3ff00000)|lx)==0
|
|
||||||
& ~v64df_issignaling_inline(y));
|
|
||||||
VECTOR_RETURN (x + y, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine if y is an odd int when x < 0
|
|
||||||
* yisint = 0 ... y is not an integer
|
|
||||||
* yisint = 1 ... y is an odd int
|
|
||||||
* yisint = 2 ... y is an even int
|
|
||||||
*/
|
|
||||||
v64si yisint = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
VECTOR_IF (hx < 0, cond)
|
|
||||||
VECTOR_IF2(iy>=0x43400000, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (yisint, VECTOR_INIT (2), cond2); /* even integer y */
|
|
||||||
VECTOR_ELSEIF2 (iy>=0x3ff00000, cond2, cond)
|
|
||||||
v64si k = (iy>>20)-0x3ff; /* exponent */
|
|
||||||
VECTOR_IF2 (k>20, cond3, cond2)
|
|
||||||
v64si j = ly>>(52-k);
|
|
||||||
VECTOR_COND_MOVE (yisint, 2-(j&1), cond3 & (j<<(52-k))==ly);
|
|
||||||
VECTOR_ELSEIF2 (ly==0, cond3, cond2)
|
|
||||||
v64si j = iy>>(20-k);
|
|
||||||
VECTOR_COND_MOVE (yisint, 2-(j&1), cond3 & (j<<(20-k))==iy);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* special value of y */
|
|
||||||
VECTOR_IF (ly==0, cond)
|
|
||||||
VECTOR_IF2 (iy==0x7ff00000, cond2, cond) /* y is +-inf */
|
|
||||||
VECTOR_IF2 (((ix-0x3ff00000)|lx)==0, cond3, cond2)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0), cond3); /* +-1**+-inf = 1 */
|
|
||||||
VECTOR_ELSEIF2 (ix >= 0x3ff00000, cond3, cond2) /* (|x|>1)**+-inf = inf,0 */
|
|
||||||
VECTOR_RETURN (y, cond3 & hy>=0);
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0), cond3);
|
|
||||||
VECTOR_ELSE2 (cond3, cond2) /* (|x|<1)**-,+inf = inf,0 */
|
|
||||||
VECTOR_RETURN (-y, cond3 & hy<0);
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2 (iy==0x3ff00000, cond2, cond) /* y is +-1 */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0) / x, cond2 & hy<0);
|
|
||||||
VECTOR_RETURN (x, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (x*x, cond & hy==0x40000000); /* y is 2 */
|
|
||||||
/* y is 0.5 */
|
|
||||||
/* x >= +0 */
|
|
||||||
VECTOR_RETURN (v64df_sqrt_aux (x, __mask), cond & (hy==0x3fe00000) & (hx>=0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df ax = __builtin_gcn_fabsv(x);
|
|
||||||
/* special value of x */
|
|
||||||
VECTOR_IF (lx==0, cond)
|
|
||||||
VECTOR_IF2 ((ix==0x7ff00000)|(ix==0)|(ix==0x3ff00000), cond2, cond)
|
|
||||||
v64df z = ax; /*x is +-0,+-inf,+-1*/
|
|
||||||
VECTOR_COND_MOVE (z, VECTOR_INIT (1.0) / z, cond2 & (hy<0)); /* z = (1/|x|) */
|
|
||||||
VECTOR_IF2 (hx<0, cond3, cond2)
|
|
||||||
VECTOR_IF2 (((ix-0x3ff00000)|yisint)==0, cond4, cond3)
|
|
||||||
VECTOR_COND_MOVE (z, (z-z)/(z-z), cond4); /* (-1)**non-int is NaN */
|
|
||||||
VECTOR_ELSEIF2 (yisint==1, cond4, cond3)
|
|
||||||
VECTOR_COND_MOVE (z, -z, cond4); /* (x<0)**odd = -(|x|**odd) */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (z, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* (x<0)**(non-int) is NaN */
|
|
||||||
VECTOR_RETURN ((x-x)/(x-x), ((((hx >> 31) & 1) - 1)|yisint)==0);
|
|
||||||
|
|
||||||
v64df t1, t2;
|
|
||||||
|
|
||||||
/* |y| is huge */
|
|
||||||
VECTOR_IF(iy>0x41e00000, cond) /* if |y| > 2**31 */
|
|
||||||
VECTOR_IF2 (iy>0x43f00000, cond2, cond) /* if |y| > 2**64, must o/uflow */
|
|
||||||
VECTOR_IF2 (ix<=0x3fefffff, cond3, cond2)
|
|
||||||
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond3 & (hy<0));
|
|
||||||
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2 (ix>=0x3ff00000, cond3, cond2)
|
|
||||||
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond3 & (hy>0));
|
|
||||||
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* over/underflow if x is not close to one */
|
|
||||||
VECTOR_IF2 (ix<0x3fefffff, cond2, cond)
|
|
||||||
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond2 & (hy<0));
|
|
||||||
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2 (ix>0x3ff00000, cond2, cond)
|
|
||||||
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond2 & (hy>0));
|
|
||||||
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* now |1-x| is tiny <= 2**-20, suffice to compute
|
|
||||||
log(x) by x-x^2/2+x^3/3-x^4/4 */
|
|
||||||
v64df t = ax-1; /* t has 20 trailing zeros */
|
|
||||||
v64df w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
|
|
||||||
v64df u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
|
|
||||||
v64df v = t*ivln2_l-w*ivln2;
|
|
||||||
VECTOR_COND_MOVE (t1, u+v, cond);
|
|
||||||
SET_LOW_WORD (t1, VECTOR_INIT (0), cond);
|
|
||||||
VECTOR_COND_MOVE (t2, v-(t1-u), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64si n = VECTOR_INIT (0);
|
|
||||||
/* take care subnormal number */
|
|
||||||
VECTOR_IF2 (ix<0x00100000, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (ax, ax * two53, cond2);
|
|
||||||
VECTOR_COND_MOVE (n, n - 53, cond2);
|
|
||||||
GET_HIGH_WORD (ix, ax, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
n += ((ix)>>20)-0x3ff;
|
|
||||||
v64si j = ix&0x000fffff;
|
|
||||||
/* determine interval */
|
|
||||||
ix = j|0x3ff00000; /* normalize ix */
|
|
||||||
v64si k;
|
|
||||||
VECTOR_IF2 (j<=0x3988E, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2); /* |x|<sqrt(3/2) */
|
|
||||||
VECTOR_ELSEIF2 (j<0xBB67A, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (1), cond2); /* |x|<sqrt(3) */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2);
|
|
||||||
VECTOR_COND_MOVE (n, n + 1, cond2);
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 0x00100000, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
SET_HIGH_WORD (ax, ix, cond);
|
|
||||||
|
|
||||||
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
|
||||||
v64df bp_k = VECTOR_MERGE (VECTOR_INIT (bp[1]), VECTOR_INIT (bp[0]), k == 1);
|
|
||||||
v64df u = ax-bp_k; /* bp[0]=1.0, bp[1]=1.5 */
|
|
||||||
v64df v = 1.0/(ax+bp_k);
|
|
||||||
v64df s = u*v;
|
|
||||||
v64df s_h = s;
|
|
||||||
SET_LOW_WORD (s_h, VECTOR_INIT (0), cond);
|
|
||||||
/* t_h=ax+bp[k] High */
|
|
||||||
v64df t_h = VECTOR_INIT (0.0);
|
|
||||||
SET_HIGH_WORD (t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18), cond);
|
|
||||||
v64df t_l = ax - (t_h-bp_k);
|
|
||||||
v64df s_l = v*((u-s_h*t_h)-s_h*t_l);
|
|
||||||
/* compute log(ax) */
|
|
||||||
v64df s2 = s*s;
|
|
||||||
v64df r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
|
|
||||||
r += s_l*(s_h+s);
|
|
||||||
s2 = s_h*s_h;
|
|
||||||
t_h = 3.0+s2+r;
|
|
||||||
SET_LOW_WORD (t_h, VECTOR_INIT (0), cond);
|
|
||||||
t_l = r-((t_h-3.0)-s2);
|
|
||||||
/* u+v = s*(1+...) */
|
|
||||||
u = s_h*t_h;
|
|
||||||
v = s_l*t_h+t_l*s;
|
|
||||||
/* 2/(3log2)*(s+...) */
|
|
||||||
v64df p_h = u+v;
|
|
||||||
SET_LOW_WORD (p_h, VECTOR_INIT (0), cond);
|
|
||||||
v64df p_l = v-(p_h-u);
|
|
||||||
v64df z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
|
|
||||||
v64df dp_l_k = VECTOR_MERGE (VECTOR_INIT (dp_l[1]), VECTOR_INIT (dp_l[0]), k == 1);
|
|
||||||
v64df z_l = cp_l*p_h+p_l*cp+dp_l_k;
|
|
||||||
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
|
||||||
v64df t = __builtin_convertvector (n, v64df);
|
|
||||||
v64df dp_h_k = VECTOR_MERGE (VECTOR_INIT (dp_h[1]), VECTOR_INIT (dp_h[0]), k == 1);
|
|
||||||
VECTOR_COND_MOVE (t1, ((z_h+z_l)+dp_h_k)+t, cond);
|
|
||||||
SET_LOW_WORD (t1, VECTOR_INIT (0), cond);
|
|
||||||
VECTOR_COND_MOVE (t2, z_l-(((t1-t)-dp_h_k)-z_h), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df s = VECTOR_INIT (1.0); /* s (sign of result -ve**odd) = -1 else = 1 */
|
|
||||||
VECTOR_COND_MOVE (s, VECTOR_INIT (-1.0), /* (-ve)**(odd int) */
|
|
||||||
((hx>>31) != 0)&(yisint == 1));
|
|
||||||
|
|
||||||
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
|
||||||
v64df y1 = y;
|
|
||||||
SET_LOW_WORD (y1, VECTOR_INIT (0), NO_COND);
|
|
||||||
v64df p_l = (y-y1)*t1+y*t2;
|
|
||||||
v64df p_h = y1*t1;
|
|
||||||
v64df z = p_l+p_h;
|
|
||||||
v64si i, j;
|
|
||||||
EXTRACT_WORDS(j, i, z);
|
|
||||||
VECTOR_IF (j>=0x40900000, cond) /* z >= 1024 */
|
|
||||||
/* if z > 1024 */
|
|
||||||
v64di cond_di = __builtin_convertvector (cond, v64di);
|
|
||||||
VECTOR_RETURN (v64df_math_oflow(s<0), cond & (((j-0x40900000)|i)!=0)); /* overflow */
|
|
||||||
VECTOR_RETURN (v64df_math_oflow(s<0), cond_di & (p_l+ovt>z-p_h)); /* overflow */
|
|
||||||
VECTOR_ELSEIF ((j&0x7fffffff)>=0x4090cc00, cond) /* z <= -1075 */
|
|
||||||
/* z < -1075 */
|
|
||||||
v64di cond_di = __builtin_convertvector (cond, v64di);
|
|
||||||
VECTOR_RETURN (v64df_math_uflow(s<0), cond & (((j-0xc090cc00)|i)!=0)); /* underflow */
|
|
||||||
VECTOR_RETURN (v64df_math_uflow(s<0), cond_di & (p_l<=z-p_h)); /* underflow */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/*
|
|
||||||
* compute 2**(p_h+p_l)
|
|
||||||
*/
|
|
||||||
i = j&0x7fffffff;
|
|
||||||
v64si k = (i>>20)-0x3ff;
|
|
||||||
v64si n = VECTOR_INIT (0);
|
|
||||||
VECTOR_IF (i>0x3fe00000, cond) /* if |z| > 0.5, set n = [z+0.5] */
|
|
||||||
VECTOR_COND_MOVE (n, j+(0x00100000>>(k+1)), cond);
|
|
||||||
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
|
|
||||||
v64df t = VECTOR_INIT (0.0);
|
|
||||||
SET_HIGH_WORD(t, n&~(0x000fffff>>k), cond);
|
|
||||||
VECTOR_COND_MOVE (n, ((n&0x000fffff)|0x00100000)>>(20-k), cond);
|
|
||||||
VECTOR_COND_MOVE (n, -n, cond & (j<0));
|
|
||||||
VECTOR_COND_MOVE (p_h, p_h - t, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
v64df t = p_l+p_h;
|
|
||||||
SET_LOW_WORD(t, VECTOR_INIT (0), NO_COND);
|
|
||||||
v64df u = t*lg2_h;
|
|
||||||
v64df v = (p_l-(t-p_h))*lg2+t*lg2_l;
|
|
||||||
z = u+v;
|
|
||||||
v64df w = v-(z-u);
|
|
||||||
t = z*z;
|
|
||||||
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
|
||||||
v64df r = (z*t1)/(t1-two)-(w+z*w);
|
|
||||||
z = VECTOR_INIT (1.0)-(r-z);
|
|
||||||
GET_HIGH_WORD(j,z, NO_COND);
|
|
||||||
j += (n<<20);
|
|
||||||
VECTOR_IF ((j>>20)<=0, cond)
|
|
||||||
VECTOR_COND_MOVE (z, v64df_scalbn_aux (z, n, __mask), cond); /* subnormal output */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
SET_HIGH_WORD(z,j, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (s*z, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (pow, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,68 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/e_remainder.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_fmod_aux (v64df, v64df, v64di);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, remainder, v64df x, v64df p)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
v64si hp, lp;
|
|
||||||
EXTRACT_WORDS (hp, lp, p);
|
|
||||||
v64si sx = hx & 0x80000000;
|
|
||||||
hp &= 0x7fffffff;
|
|
||||||
hx &= 0x7fffffff;
|
|
||||||
|
|
||||||
/* purge off exception values */
|
|
||||||
VECTOR_RETURN ((x * p) / (x * p), ((hp | lp) == 0) | ((hx >= 0x7ff00000)
|
|
||||||
| /* x not finite */
|
|
||||||
((hp >= 0x7ff00000) & /* p is NaN */
|
|
||||||
(((hp - 0x7ff00000) | lp) != 0))));
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (x, v64df_fmod_aux (x, p+p, __mask), hp <= 0x7fdfffff); // now x < 2p
|
|
||||||
|
|
||||||
VECTOR_RETURN (0.0 * x, ((hx-hp)|(lx-lp))==0);
|
|
||||||
|
|
||||||
x = __builtin_gcn_fabsv (x);
|
|
||||||
p = __builtin_gcn_fabsv (p);
|
|
||||||
|
|
||||||
VECTOR_IF (hp < 0x00200000, cond)
|
|
||||||
VECTOR_IF2 (x + x > p, cond2, __builtin_convertvector(cond, v64di))
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2);
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2 & (x + x >= p));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64df p_half = 0.5 * p;
|
|
||||||
VECTOR_IF2 (x > p_half, cond2, __builtin_convertvector(cond, v64di))
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2);
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2 & (x >= p_half));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
GET_HIGH_WORD (hx, x, NO_COND);
|
|
||||||
SET_HIGH_WORD (x, hx ^ sx, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_RETURN (x, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (remainder, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,69 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_rint.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const double TWO52[2] = {
|
|
||||||
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
|
|
||||||
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
|
|
||||||
};
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, rint, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si i0, i1;
|
|
||||||
EXTRACT_WORDS (i0, i1, x);
|
|
||||||
v64si sx = (i0 >> 31) & 1;
|
|
||||||
v64df two52 = VECTOR_MERGE (VECTOR_INIT (TWO52[1]), VECTOR_INIT (TWO52[0]), sx != 0);
|
|
||||||
v64si j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
|
|
||||||
v64si i;
|
|
||||||
VECTOR_IF (j0 < 20, cond)
|
|
||||||
VECTOR_IF2 (j0 < 0, cond2, cond)
|
|
||||||
VECTOR_RETURN (x, cond2 & (((i0 & 0x7fffffff) | i1) == 0));
|
|
||||||
VECTOR_COND_MOVE (i1, i1 | (i0 & 0x0fffff), cond2);
|
|
||||||
VECTOR_COND_MOVE (i0, i0 & 0xfffe0000, cond2);
|
|
||||||
VECTOR_COND_MOVE (i0, i0 | (((i1 | -i1) >> 12) & 0x80000), cond2);
|
|
||||||
SET_HIGH_WORD (x, i0, cond2);
|
|
||||||
v64df w = two52 + x;
|
|
||||||
v64df t = w - two52;
|
|
||||||
GET_HIGH_WORD (i0, t, cond2);
|
|
||||||
SET_HIGH_WORD (t, (i0&0x7fffffff)|(sx<<31), cond2);
|
|
||||||
VECTOR_RETURN (t, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
i = (0x000fffff) >> j0;
|
|
||||||
VECTOR_RETURN (x, cond2 & (((i0 & i) | i1) == 0)); /* x is integral */
|
|
||||||
i >>= 1;
|
|
||||||
VECTOR_IF2 (((i0 & i) | i1) != 0, cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE (i1, CAST_VECTOR(v64si, VECTOR_INIT (0x80000000)), cond3 & (j0 == 19));
|
|
||||||
VECTOR_COND_MOVE (i1, VECTOR_INIT (0), cond3 & (j0 != 19));
|
|
||||||
VECTOR_COND_MOVE (i0, (i0 & (~i)) | ((0x40000) >> j0), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSEIF (j0 > 51, cond)
|
|
||||||
VECTOR_RETURN (x + x, cond & (j0 == 0x400));
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
i = CAST_VECTOR (v64si, VECTOR_INIT (0xffffffff) >> (j0 - 20));
|
|
||||||
VECTOR_RETURN (x, cond & ((i1 & i) == 0));
|
|
||||||
i >>= 1;
|
|
||||||
VECTOR_COND_MOVE (i1, (i1 & (~i)) | (0x40000000 >> (j0 - 20)), cond & ((i1 & i) != 0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
INSERT_WORDS (x, i0, i1, NO_COND);
|
|
||||||
v64df w = two52 + x;
|
|
||||||
VECTOR_RETURN (w - two52, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (rint, df, df)
|
|
|
@ -1,50 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/e_scalb.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_isnan (v64df);
|
|
||||||
v64si v64df_finite (v64df);
|
|
||||||
v64df v64df_rint_aux (v64df, v64di);
|
|
||||||
v64df v64df_scalbn_aux (v64df, v64si, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, scalb, v64df x, v64df fn)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
VECTOR_IF (v64df_isnan(x) | v64df_isnan(fn), cond)
|
|
||||||
VECTOR_RETURN (x * fn, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (~v64df_finite (fn), cond)
|
|
||||||
VECTOR_IF2 (fn > 0.0, cond2, cond)
|
|
||||||
VECTOR_RETURN (x * fn, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (x / (-fn), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (v64df_rint_aux (fn, __mask) != fn, cond)
|
|
||||||
VECTOR_RETURN ((fn-fn)/(fn-fn), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (fn > 65000.0, cond)
|
|
||||||
VECTOR_RETURN (v64df_scalbn_aux (x, VECTOR_INIT (65000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (-fn > 65000.0, cond)
|
|
||||||
VECTOR_RETURN (v64df_scalbn_aux (x, VECTOR_INIT (-65000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (v64df_scalbn_aux (x, __builtin_convertvector (fn, v64si), __mask),
|
|
||||||
NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (scalb, df, df)
|
|
|
@ -1,58 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/s_scalbn.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
#include <limits.h>
|
|
||||||
#include <float.h>
|
|
||||||
|
|
||||||
static const double
|
|
||||||
two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
|
|
||||||
twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
|
|
||||||
huge = 1.0e+300,
|
|
||||||
tiny = 1.0e-300;
|
|
||||||
|
|
||||||
v64df v64df_copysign_aux (v64df, v64df, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, scalbn, v64df x, v64si n)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
const v64df huge_v = VECTOR_INIT ((double) huge);
|
|
||||||
const v64df tiny_v = VECTOR_INIT ((double) tiny);
|
|
||||||
|
|
||||||
v64si hx, lx;
|
|
||||||
EXTRACT_WORDS (hx, lx, x);
|
|
||||||
v64si k =(hx&0x7ff00000)>>20; /* extract exponent */
|
|
||||||
VECTOR_IF (k == 0, cond) /* 0 or subnormal x */
|
|
||||||
VECTOR_RETURN (x, cond & ((lx|(hx&0x7fffffff))==0)); /* +- 0 */
|
|
||||||
VECTOR_COND_MOVE (x, x * two54, cond);
|
|
||||||
GET_HIGH_WORD (hx, x, cond);
|
|
||||||
VECTOR_COND_MOVE (k, ((hx&0x7ff00000)>>20) - 54, cond);
|
|
||||||
VECTOR_RETURN (tiny*x, cond & (n < -50000)); /*underflow*/
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (x+x, k == 0x7ff); /* NaN or Inf */
|
|
||||||
/* in case integer overflow in n+k */
|
|
||||||
VECTOR_RETURN (huge_v * v64df_copysign_aux (huge_v, x, __mask), n > 50000);
|
|
||||||
k = k + n;
|
|
||||||
VECTOR_RETURN (huge_v * v64df_copysign_aux (huge_v, x, __mask), k > 0x7fe);
|
|
||||||
VECTOR_IF (k > 0, cond) /* normal result */
|
|
||||||
SET_HIGH_WORD (x, (hx&0x800fffff)|(k<<20), cond);
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (tiny_v * v64df_copysign_aux (tiny_v, x, __mask), k <= -54); /*underflow*/
|
|
||||||
k += 54; /* subnormal result */
|
|
||||||
SET_HIGH_WORD (x, (hx&0x800fffff)|(k<<20), NO_COND);
|
|
||||||
VECTOR_RETURN (x * twom54, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_signif.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_scalb_aux (v64df x, v64df fn, v64di);
|
|
||||||
v64si v64df_ilogb_aux (v64df x, v64si);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, significand, v64df x)
|
|
||||||
{
|
|
||||||
v64si si_mask = __builtin_convertvector (__mask, v64si);
|
|
||||||
return v64df_scalb_aux (x, -__builtin_convertvector (v64df_ilogb_aux (x, si_mask), v64df), __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (significand, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_sin.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_sine_aux (v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, sin, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_sine_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sin, df, df)
|
|
|
@ -1,97 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_sine.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_numtest (v64df x);
|
|
||||||
|
|
||||||
static const double HALF_PI = 1.57079632679489661923;
|
|
||||||
static const double ONE_OVER_PI = 0.31830988618379067154;
|
|
||||||
static const double r[] = { -0.16666666666666665052,
|
|
||||||
0.83333333333331650314e-02,
|
|
||||||
-0.19841269841201840457e-03,
|
|
||||||
0.27557319210152756119e-05,
|
|
||||||
-0.25052106798274584544e-07,
|
|
||||||
0.16058936490371589114e-09,
|
|
||||||
-0.76429178068910467734e-12,
|
|
||||||
0.27204790957888846175e-14 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC(v64df, sine, v64df x, int cosine)
|
|
||||||
{
|
|
||||||
const double YMAX = 210828714.0;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si num_type = v64df_numtest (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Use sin and cos properties to ease computations. */
|
|
||||||
v64di sgn;
|
|
||||||
v64df y;
|
|
||||||
|
|
||||||
if (cosine)
|
|
||||||
{
|
|
||||||
sgn = VECTOR_INIT (0L);
|
|
||||||
y = __builtin_gcn_fabsv (x) + HALF_PI;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sgn = x < 0.0;
|
|
||||||
y = VECTOR_MERGE (-x, x, x < 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check for values of y that will overflow here. */
|
|
||||||
VECTOR_IF (y > YMAX, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Calculate the exponent. */
|
|
||||||
v64si Nneg = __builtin_convertvector (y * ONE_OVER_PI - 0.5, v64si);
|
|
||||||
v64si Npos = __builtin_convertvector (y * ONE_OVER_PI + 0.5, v64si);
|
|
||||||
v64si N = VECTOR_MERGE (Nneg, Npos, y < 0.0);
|
|
||||||
v64df XN = __builtin_convertvector (N, v64df);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (sgn, ~sgn, (N & 1) != 0);
|
|
||||||
|
|
||||||
if (cosine)
|
|
||||||
XN -= 0.5;
|
|
||||||
|
|
||||||
y = __builtin_gcn_fabsv (x) - XN * __PI;
|
|
||||||
|
|
||||||
v64df res;
|
|
||||||
|
|
||||||
VECTOR_IF ((-z_rooteps < y) & (y < z_rooteps), cond)
|
|
||||||
VECTOR_COND_MOVE (res, y, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64df g = y * y;
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64df R = (((((((r[6] * g + r[5]) * g + r[4]) * g + r[3]) * g + r[2]) * g + r[1]) * g + r[0]) * g);
|
|
||||||
|
|
||||||
/* Finally, compute the result. */
|
|
||||||
VECTOR_COND_MOVE (res, y + y * R, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, sgn);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,101 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_sineh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_exp_aux (v64df, v64di);
|
|
||||||
v64si v64df_numtest (v64df);
|
|
||||||
v64si v64df_ispos (v64df);
|
|
||||||
|
|
||||||
static const double q[] = { -0.21108770058106271242e+7,
|
|
||||||
0.36162723109421836460e+5,
|
|
||||||
-0.27773523119650701667e+3 };
|
|
||||||
static const double p[] = { -0.35181283430177117881e+6,
|
|
||||||
-0.11563521196851768270e+5,
|
|
||||||
-0.16375798202630751372e+3,
|
|
||||||
-0.78966127417357099479 };
|
|
||||||
static const double LNV = 0.6931610107421875000;
|
|
||||||
static const double INV_V2 = 0.24999308500451499336;
|
|
||||||
static const double V_OVER2_MINUS1 = 0.13830277879601902638e-4;
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, sineh, v64df x, int cosineh)
|
|
||||||
{
|
|
||||||
const double WBAR = 18.55;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si sgn = VECTOR_INIT (0);
|
|
||||||
v64di v_cosineh = VECTOR_INIT (cosineh ? -1L : 0L);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si num_type = v64df_numtest (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity.d),
|
|
||||||
VECTOR_INIT (-z_infinity.d),
|
|
||||||
v64df_ispos (x)),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df y = __builtin_gcn_fabsv (x);
|
|
||||||
|
|
||||||
if (!cosineh)
|
|
||||||
VECTOR_COND_MOVE (sgn, VECTOR_INIT (-1), x < 0.0);
|
|
||||||
|
|
||||||
v64df res;
|
|
||||||
|
|
||||||
VECTOR_IF (((y > 1.0) & ~v_cosineh) | v_cosineh, cond)
|
|
||||||
VECTOR_IF2 (y > BIGX, cond2, cond)
|
|
||||||
v64df w = y - LNV;
|
|
||||||
|
|
||||||
/* Check for w > maximum here. */
|
|
||||||
VECTOR_IF2 (w > BIGX, cond3, cond2)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (x, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64df z = v64df_exp_aux (w, __mask);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, z * (V_OVER2_MINUS1 + 1.0),
|
|
||||||
cond2 & (w > WBAR));
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64df z = v64df_exp_aux (y, __mask);
|
|
||||||
if (cosineh)
|
|
||||||
VECTOR_COND_MOVE (res, (z + 1 / z) * 0.5, cond2);
|
|
||||||
else
|
|
||||||
VECTOR_COND_MOVE (res, (z - 1 / z) * 0.5, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, sgn);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
/* Check for y being too small. */
|
|
||||||
VECTOR_IF2 (y < z_rooteps, cond2, cond);
|
|
||||||
VECTOR_COND_MOVE (res, x, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64df f = x * x;
|
|
||||||
v64df Q = ((f + q[2]) * f + q[1]) * f + q[0];
|
|
||||||
v64df P = ((p[3] * f + p[2]) * f + p[1]) * f + p[0];
|
|
||||||
v64df R = f * (P / Q);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, x + x * R, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/s_sinh.c. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_sineh_aux (v64df, int, v64di);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, sinh, v64df x)
|
|
||||||
{
|
|
||||||
return v64df_sineh_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sinh, df, df)
|
|
|
@ -1,75 +0,0 @@
|
||||||
/*****************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_sqrt.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64df_numtest (v64df);
|
|
||||||
v64si v64df_ispos (v64df);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpv_mant) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpv_exp) \
|
|
||||||
&& __has_builtin (__builtin_gcn_ldexpv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, sqrt, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si num_type = v64df_numtest (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
VECTOR_IF2 (v64df_ispos (x), cond2, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2,cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_infinity.d), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Initial checks are performed here. */
|
|
||||||
VECTOR_IF (x == 0.0, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (x < 0.0, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Find the exponent and mantissa for the form x = f * 2^exp. */
|
|
||||||
v64df f = __builtin_gcn_frexpv_mant (x);
|
|
||||||
v64si exp = __builtin_gcn_frexpv_exp (x);
|
|
||||||
v64si odd = (exp & 1) != 0;
|
|
||||||
|
|
||||||
/* Get the initial approximation. */
|
|
||||||
v64df y = 0.41731 + 0.59016 * f;
|
|
||||||
|
|
||||||
f *= 0.5f;
|
|
||||||
/* Calculate the remaining iterations. */
|
|
||||||
y = y * 0.5f + f / y;
|
|
||||||
y = y * 0.5f + f / y;
|
|
||||||
y = y * 0.5f + f / y;
|
|
||||||
|
|
||||||
/* Calculate the final value. */
|
|
||||||
VECTOR_COND_MOVE (y, y * __SQRT_HALF, odd);
|
|
||||||
VECTOR_COND_MOVE (exp, exp + 1, odd);
|
|
||||||
exp >>= 1;
|
|
||||||
y = __builtin_gcn_ldexpv (y, exp);
|
|
||||||
|
|
||||||
VECTOR_RETURN (y, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sqrt, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,142 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/kf_tan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const double
|
|
||||||
pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
|
|
||||||
pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
|
|
||||||
T[] = {
|
|
||||||
3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
|
|
||||||
1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
|
|
||||||
5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
|
|
||||||
2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
|
|
||||||
8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
|
|
||||||
3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
|
|
||||||
1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
|
|
||||||
5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
|
|
||||||
2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
|
|
||||||
7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
|
|
||||||
7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
|
|
||||||
-1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
|
|
||||||
2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
|
|
||||||
};
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
static v64df
|
|
||||||
v64df_kernel_tan (v64df x, v64df y, v64si iy, v64di __mask)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_HIGH_WORD(hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff; /* high word of |x| */
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3e300000, cond) /* x < 2**-28 */
|
|
||||||
VECTOR_IF2(__builtin_convertvector (x, v64si)==0, cond2, cond) /* generate inexact */
|
|
||||||
v64si low;
|
|
||||||
GET_LOW_WORD (low, x, cond2);
|
|
||||||
VECTOR_RETURN (1.0 / __builtin_gcn_fabsv (x), ((ix|low)|(iy+1))==0);
|
|
||||||
VECTOR_RETURN (x, cond2 & (iy == 1));
|
|
||||||
v64df z, w;
|
|
||||||
z = w = x + y;
|
|
||||||
SET_LOW_WORD (z, VECTOR_INIT (0.0), cond2);
|
|
||||||
v64df v = y - (z - x);
|
|
||||||
v64df t, a;
|
|
||||||
t = a = -1.0 / w;
|
|
||||||
SET_LOW_WORD(t, VECTOR_INIT (0.0), cond2);
|
|
||||||
v64df s = 1.0 + t * z;
|
|
||||||
VECTOR_RETURN ( t + a * (s + t * v), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF(ix>=0x3FE59428, cond) /* |x|>=0.6744 */
|
|
||||||
VECTOR_COND_MOVE (x, -x, cond & (hx < 0));
|
|
||||||
VECTOR_COND_MOVE (y, -y, cond & (hx < 0));
|
|
||||||
v64df z = pio4-x;
|
|
||||||
v64df w = pio4lo-y;
|
|
||||||
VECTOR_COND_MOVE (x, z+w, cond);
|
|
||||||
VECTOR_COND_MOVE (y, VECTOR_INIT (0.0), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
v64df z = x*x;
|
|
||||||
v64df w = z*z;
|
|
||||||
/* Break x^5*(T[1]+x^2*T[2]+...) into
|
|
||||||
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
|
|
||||||
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
|
|
||||||
*/
|
|
||||||
v64df r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
|
|
||||||
v64df v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
|
|
||||||
v64df s = z*x;
|
|
||||||
r = y + z*(s*(r+v)+y);
|
|
||||||
r += T[0]*s;
|
|
||||||
w = x+r;
|
|
||||||
VECTOR_IF(ix>=0x3FE59428, cond)
|
|
||||||
v = __builtin_convertvector (iy, v64df);
|
|
||||||
VECTOR_RETURN (__builtin_convertvector (1-((hx>>30)&2), v64df)
|
|
||||||
* (v-2.0*(x-(w*w/(w+v)-r))), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (w, iy == 1);
|
|
||||||
/* if allow error up to 2 ulp,
|
|
||||||
simply return -1.0/(x+r) here */
|
|
||||||
/* compute -1.0/(x+r) accurately */
|
|
||||||
z = w;
|
|
||||||
SET_LOW_WORD (z, VECTOR_INIT (0), NO_COND);
|
|
||||||
v = r - (z - x); /* z+v = r+x */
|
|
||||||
v64df a, t;
|
|
||||||
t = a = -1.0/w; /* a = -1.0/w */
|
|
||||||
SET_LOW_WORD(t, VECTOR_INIT (0), NO_COND);
|
|
||||||
s = 1.0+t*z;
|
|
||||||
VECTOR_RETURN (t+a*(s+t*v), NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
static v64si
|
|
||||||
v64df_rem_pio2 (v64df x, v64df *y)
|
|
||||||
{
|
|
||||||
v64df r = x * __INV_PI_OVER_TWO_2_24;
|
|
||||||
v64si n = (__builtin_convertvector (r, v64si) + 0x800000) >> 24;
|
|
||||||
x = x - __builtin_convertvector (n, v64df) * __PI_OVER_TWO;
|
|
||||||
|
|
||||||
y[0] = x;
|
|
||||||
y[1] = x - y[0];
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, tan, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64si ix;
|
|
||||||
GET_HIGH_WORD (ix, x, NO_COND);
|
|
||||||
|
|
||||||
/* |x| ~< pi/4 */
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
VECTOR_RETURN (v64df_kernel_tan (x, VECTOR_INIT (0.0), VECTOR_INIT (1), __mask),
|
|
||||||
ix <= 0x3fe921fb);
|
|
||||||
|
|
||||||
/* tan(Inf or NaN) is NaN */
|
|
||||||
VECTOR_RETURN (x-x, ix >= 0x7ff00000); /* NaN */
|
|
||||||
|
|
||||||
/* argument reduction needed */
|
|
||||||
v64df y[2];
|
|
||||||
v64si n = v64df_rem_pio2 (x,y);
|
|
||||||
VECTOR_RETURN (v64df_kernel_tan (y[0], y[1], 1-((n&1)<<1), __mask), // 1 -- n even
|
|
||||||
NO_COND); // -1 -- n odd
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tan, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,63 +0,0 @@
|
||||||
/*****************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/s_tanh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_exp_aux (v64df, v64di);
|
|
||||||
|
|
||||||
static const double LN3_OVER2 = 0.54930614433405484570;
|
|
||||||
static const double p[] = { -0.16134119023996228053e+4,
|
|
||||||
-0.99225929672236083313e+2,
|
|
||||||
-0.96437492777225469787 };
|
|
||||||
static const double q[] = { 0.48402357071988688686e+4,
|
|
||||||
0.22337720718962312926e+4,
|
|
||||||
0.11274474380534949335e+3 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, tanh, v64df x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64df);
|
|
||||||
|
|
||||||
v64df f = __builtin_gcn_fabsv (x);
|
|
||||||
v64df res;
|
|
||||||
|
|
||||||
/* Check if the input is too big. */
|
|
||||||
VECTOR_IF (f > BIGX, cond)
|
|
||||||
VECTOR_COND_MOVE (res, VECTOR_INIT (1.0), cond);
|
|
||||||
|
|
||||||
VECTOR_ELSEIF (f > LN3_OVER2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, 1.0 - 2.0 / (v64df_exp_aux (2 * f, __mask) + 1.0),
|
|
||||||
cond);
|
|
||||||
|
|
||||||
/* Check if the input is too small. */
|
|
||||||
VECTOR_ELSEIF (f < z_rooteps, cond)
|
|
||||||
VECTOR_COND_MOVE (res, f, cond);
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64df g = f * f;
|
|
||||||
|
|
||||||
v64df P = (p[2] * g + p[1]) * g + p[0];
|
|
||||||
v64df Q = ((g + q[2]) * g + q[1]) * g + q[0];
|
|
||||||
v64df R = g * (P / Q);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, f + f * R, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, x < 0.0);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tanh, df, df)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,28 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/e_tgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64df v64df_exp_aux (v64df x, v64di __mask);
|
|
||||||
v64df v64df_lgamma_r_aux (v64df x, v64si *signgamp, v64di __mask);
|
|
||||||
|
|
||||||
DEF_VD_MATH_FUNC (v64df, tgamma, v64df x)
|
|
||||||
{
|
|
||||||
v64si signgam_local;
|
|
||||||
v64df y = v64df_exp_aux(v64df_lgamma_r_aux(x, &signgam_local, __mask), __mask);
|
|
||||||
VECTOR_COND_MOVE(y, -y, signgam_local < 0);
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tgamma, df, df)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_acos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_asinef_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, acosf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_asinef_aux(x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (acosf, sf, sf)
|
|
|
@ -1,66 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/ef_acosh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_logf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_log1pf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_sqrtf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, acoshf, v64sf x)
|
|
||||||
{
|
|
||||||
static const float one = 1.0;
|
|
||||||
static const float ln2 = 6.9314718246e-01; /* 0x3f317218 */
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_IF (hx < 0x3f800000, cond) // x < 1 */
|
|
||||||
VECTOR_RETURN ((x-x) / (x-x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx >=0x4d800000, cond) // x > 2**28 */
|
|
||||||
VECTOR_IF2 (hx >=0x7f800000, cond2, cond) // x is inf of NaN */
|
|
||||||
VECTOR_RETURN (x+x, cond2);
|
|
||||||
VECTOR_ELSE (cond2)
|
|
||||||
/* acosh(huge)=log(2x) */
|
|
||||||
VECTOR_RETURN (v64sf_logf_aux (x, __mask) + ln2, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx == 0x3f800000, cond)
|
|
||||||
/* acosh(1) = 0 */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx > 0x40000000, cond) /* 2**28 > x > 2 */
|
|
||||||
{
|
|
||||||
v64sf t = x * x;
|
|
||||||
VECTOR_RETURN (v64sf_logf_aux (2.0f*x - 1.0f /
|
|
||||||
(x + v64sf_sqrtf_aux (t - 1.0f, __mask)),
|
|
||||||
__mask),
|
|
||||||
cond);
|
|
||||||
}
|
|
||||||
VECTOR_ELSE (cond) /* 1<x<2 */
|
|
||||||
{
|
|
||||||
v64sf t = x - 1.0f;
|
|
||||||
VECTOR_RETURN (v64sf_log1pf_aux (t + v64sf_sqrtf_aux(2.0*t + t*t, __mask),
|
|
||||||
__mask),
|
|
||||||
cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (acoshf, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_asin.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_asinef_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, asinf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_asinef_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (asinf, sf, sf)
|
|
|
@ -1,98 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_asine.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_numtestf (v64sf);
|
|
||||||
v64sf v64sf_sqrtf (v64sf);
|
|
||||||
|
|
||||||
static const float p[] = { 0.933935835, -0.504400557 };
|
|
||||||
static const float q[] = { 0.560363004e+1, -0.554846723e+1 };
|
|
||||||
static const float a[] = { 0.0, 0.785398163 };
|
|
||||||
static const float b[] = { 1.570796326, 0.785398163 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, asinef, v64sf x, int acosine)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si branch = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si i = v64sf_numtestf (x);
|
|
||||||
VECTOR_IF ((i == NAN) | (i == INF), cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (x, VECTOR_INIT (z_infinity_f.f),
|
|
||||||
i == NAN),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf y = __builtin_gcn_fabsvf (x);
|
|
||||||
v64sf g, res;
|
|
||||||
|
|
||||||
VECTOR_IF (y > 0.5f, cond)
|
|
||||||
VECTOR_COND_MOVE (i, VECTOR_INIT (1 - acosine), cond);
|
|
||||||
|
|
||||||
/* Check for range error. */
|
|
||||||
VECTOR_IF2 (y > 1.0f, cond2, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (g, (1.0f - y) / 2.0f, cond);
|
|
||||||
VECTOR_COND_MOVE (y, -2.0f * v64sf_sqrtf (g), cond);
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (i, VECTOR_INIT (acosine), cond);
|
|
||||||
VECTOR_IF2 (y < z_rooteps_f, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, y, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (g, y * y, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF ((y >= z_rooteps_f) | branch, cond)
|
|
||||||
{
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64sf P = (p[1] * g + p[0]) * g;
|
|
||||||
v64sf Q = (g + q[1]) * g + q[0];
|
|
||||||
v64sf R = P / Q;
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, y + y * R, cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf a_i = VECTOR_MERGE (VECTOR_INIT (a[1]), VECTOR_INIT (a[0]), i != 0);
|
|
||||||
|
|
||||||
/* Calculate asine or acose. */
|
|
||||||
if (acosine == 0)
|
|
||||||
{
|
|
||||||
VECTOR_COND_MOVE (res, (a_i + res) + a_i, NO_COND);
|
|
||||||
VECTOR_IF (x < 0.0f, cond)
|
|
||||||
VECTOR_COND_MOVE (res, -res, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
v64sf b_i = VECTOR_MERGE (VECTOR_INIT(b[1]), VECTOR_INIT(b[0]), i != 0);
|
|
||||||
|
|
||||||
VECTOR_IF (x < 0.0f, cond)
|
|
||||||
VECTOR_COND_MOVE (res, (b_i + res) + b_i, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (res, (a_i - res) + a_i, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,68 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_asinh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_logf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_log1pf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_sqrtf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, asinhf, v64sf x)
|
|
||||||
{
|
|
||||||
static const float one = 1.0000000000e+00; /* 0x3F800000 */
|
|
||||||
static const float ln2 = 6.9314718246e-01; /* 0x3f317218 */
|
|
||||||
static const float huge = 1.0000000000e+30;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf w;
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix >=0x7f800000, cond) /* x is inf or NaN */
|
|
||||||
VECTOR_RETURN (x + x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix < 0x31800000, cond) /* |x|<2**-28 */
|
|
||||||
VECTOR_IF2 (huge+x > one, cond2, cond) /* return x inexact except 0 */
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix > 0x4d800000, cond) /* x > 2**28 */
|
|
||||||
VECTOR_COND_MOVE (w, v64sf_logf_aux (__builtin_gcn_fabsvf (x), __mask) +
|
|
||||||
ln2,
|
|
||||||
cond);
|
|
||||||
VECTOR_ELSEIF (ix > 0x40000000, cond) /* 2**28 > |x| > 2.0 */
|
|
||||||
v64sf t = __builtin_gcn_fabsvf (x);
|
|
||||||
VECTOR_COND_MOVE (w, v64sf_logf_aux (2.0f * t + 1.0f / (v64sf_sqrtf_aux (x*x + 1.0f, __mask) + t), __mask),
|
|
||||||
cond);
|
|
||||||
VECTOR_ELSE (cond) /* 2.0 > |x| > 2**-28 */
|
|
||||||
v64sf t = x * x;
|
|
||||||
VECTOR_COND_MOVE (w, v64sf_log1pf_aux (__builtin_gcn_fabsvf (x) + t / (1.0f + v64sf_sqrtf_aux (1.0f + t, __mask)), __mask),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (hx > 0, cond)
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (-w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (asinhf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,13 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_atan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_atangentf_aux (v64sf, v64sf, v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, atanf, v64sf x)
|
|
||||||
{
|
|
||||||
return (v64sf_atangentf_aux (x, VECTOR_INIT (0.0f), VECTOR_INIT (0.0f),
|
|
||||||
0, __mask));
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (atanf, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_atan2.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_atangentf_aux (v64sf, v64sf, v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, atan2f, v64sf v, v64sf u)
|
|
||||||
{
|
|
||||||
return v64sf_atangentf_aux (VECTOR_INIT (0.0f), v, u, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (atan2f, sf, sf)
|
|
|
@ -1,123 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_atangent.c in Newlib. */
|
|
||||||
|
|
||||||
#include <float.h>
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float ROOT3 = 1.732050807;
|
|
||||||
static const float a[] = { 0.0, 0.523598775, 1.570796326,
|
|
||||||
1.047197551 };
|
|
||||||
static const float q[] = { 0.1412500740e+1 };
|
|
||||||
static const float p[] = { -0.4708325141, -0.5090958253e-1 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpvf_exp) \
|
|
||||||
&& __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, atangentf, v64sf x, v64sf v, v64sf u, int arctan2)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf zero = VECTOR_INIT (0.0f);
|
|
||||||
v64sf res;
|
|
||||||
v64si branch = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
/* Preparation for calculating arctan2. */
|
|
||||||
if (arctan2)
|
|
||||||
{
|
|
||||||
VECTOR_IF (u == 0.0f, cond)
|
|
||||||
VECTOR_IF2 (v == 0.0f, cond2, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (~branch, cond)
|
|
||||||
/* Get the exponent values of the inputs. */
|
|
||||||
v64si expv = __builtin_gcn_frexpvf_exp (v);
|
|
||||||
v64si expu = __builtin_gcn_frexpvf_exp (u);
|
|
||||||
|
|
||||||
/* See if a divide will overflow. */
|
|
||||||
v64si e = expv - expu;
|
|
||||||
|
|
||||||
VECTOR_IF2 (e > FLT_MAX_EXP, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Also check for underflow. */
|
|
||||||
VECTOR_IF2 (e < FLT_MIN_EXP, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
|
|
||||||
VECTOR_COND_MOVE (res, zero, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
}
|
|
||||||
|
|
||||||
VECTOR_IF (~branch, cond)
|
|
||||||
v64sf f;
|
|
||||||
v64si N = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
if (arctan2)
|
|
||||||
f = __builtin_gcn_fabsvf (v / u);
|
|
||||||
else
|
|
||||||
f = __builtin_gcn_fabsvf (x);
|
|
||||||
|
|
||||||
VECTOR_IF2 (f > 1.0f, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (f, 1.0f / f, cond2);
|
|
||||||
VECTOR_COND_MOVE (N, VECTOR_INIT (2), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF2 (f > (2.0f - ROOT3), cond2, cond)
|
|
||||||
float A = ROOT3 - 1.0f;
|
|
||||||
VECTOR_COND_MOVE (f, (((A * f - 0.5f) - 0.5f) + f) / (ROOT3 + f),
|
|
||||||
cond2);
|
|
||||||
N += cond2 & 1;
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Check for values that are too small. */
|
|
||||||
VECTOR_IF2 ((-z_rooteps_f < f) & (f < z_rooteps_f), cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, f, cond2);
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64sf g = f * f;
|
|
||||||
v64sf P = (p[1] * g + p[0]) * g;
|
|
||||||
v64sf Q = g + q[0];
|
|
||||||
v64sf R = P / Q;
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, f + f * R, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, cond & (N > 1));
|
|
||||||
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[1]), zero, cond & (N == 1));
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[2]), zero, cond & (N == 2));
|
|
||||||
res += VECTOR_MERGE (VECTOR_INIT (a[3]), zero, cond & (N == 3));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
if (arctan2)
|
|
||||||
{
|
|
||||||
/*if (u < 0.0)*/
|
|
||||||
VECTOR_COND_MOVE (res, (float) __PI - res, u < 0.0f);
|
|
||||||
/*if (v < 0.0)*/
|
|
||||||
VECTOR_COND_MOVE (res, -res, v < 0.0f);
|
|
||||||
}
|
|
||||||
/*else if (x < 0.0) */
|
|
||||||
else
|
|
||||||
VECTOR_COND_MOVE (res, -res, x < 0.0f);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,61 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/ef_atanh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_log1pf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, atanhf, v64sf x)
|
|
||||||
{
|
|
||||||
static const float zero = 0.0;
|
|
||||||
static const float one = 1.0, huge = 1e30;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf t;
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix > 0x3f800000, cond) // |x|>1
|
|
||||||
VECTOR_RETURN ((x - x)/(x - x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix == 0x3f800000, cond)
|
|
||||||
VECTOR_RETURN (x / zero, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF ((ix < 0x31800000) & ((huge + x) > zero), cond) // x<2**-28
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
SET_FLOAT_WORD (x, ix, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3f000000, cond) // x < 0.5 */
|
|
||||||
v64sf t2 = x + x;
|
|
||||||
VECTOR_COND_MOVE (t, 0.5f * v64sf_log1pf_aux (t2 + t2 * x / (one - x), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (t, 0.5f * v64sf_log1pf_aux ((x + x) / (one - x), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (hx >= 0, cond)
|
|
||||||
VECTOR_RETURN (t, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (-t, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (atanhf, sf, sf)
|
|
|
@ -1,29 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_copysign.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, copysignf, v64sf x, v64sf y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si ix, iy;
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
GET_FLOAT_WORD (iy, y, NO_COND);
|
|
||||||
SET_FLOAT_WORD (x, (ix & 0x7fffffff) | (iy & 0x80000000), NO_COND);
|
|
||||||
VECTOR_RETURN (x, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (copysignf, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_cos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_sinef_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, cosf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_sinef_aux (x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (cosf, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_cosh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_sinehf_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, coshf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_sinehf_aux (x, 1, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (coshf, sf, sf)
|
|
|
@ -1,172 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_erf.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_expf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
static const float
|
|
||||||
tiny = 1e-30,
|
|
||||||
half= 5.0000000000e-01, /* 0x3F000000 */
|
|
||||||
one = 1.0000000000e+00, /* 0x3F800000 */
|
|
||||||
two = 2.0000000000e+00, /* 0x40000000 */
|
|
||||||
/* c = (subfloat)0.84506291151 */
|
|
||||||
erx = 8.4506291151e-01, /* 0x3f58560b */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf on [0,0.84375]
|
|
||||||
*/
|
|
||||||
efx = 1.2837916613e-01, /* 0x3e0375d4 */
|
|
||||||
efx8= 1.0270333290e+00, /* 0x3f8375d4 */
|
|
||||||
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
|
|
||||||
pp1 = -3.2504209876e-01, /* 0xbea66beb */
|
|
||||||
pp2 = -2.8481749818e-02, /* 0xbce9528f */
|
|
||||||
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
|
|
||||||
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
|
|
||||||
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
|
|
||||||
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
|
|
||||||
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
|
|
||||||
qq4 = 1.3249473704e-04, /* 0x390aee49 */
|
|
||||||
qq5 = -3.9602282413e-06, /* 0xb684e21a */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erf in [0.84375,1.25]
|
|
||||||
*/
|
|
||||||
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
|
|
||||||
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
|
|
||||||
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
|
|
||||||
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
|
|
||||||
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
|
|
||||||
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
|
|
||||||
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
|
|
||||||
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
|
|
||||||
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
|
|
||||||
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
|
|
||||||
qa4 = 1.2617121637e-01, /* 0x3e013307 */
|
|
||||||
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
|
|
||||||
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1.25,1/0.35]
|
|
||||||
*/
|
|
||||||
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
|
|
||||||
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
|
|
||||||
ra2 = -1.0558626175e+01, /* 0xc128f022 */
|
|
||||||
ra3 = -6.2375331879e+01, /* 0xc2798057 */
|
|
||||||
ra4 = -1.6239666748e+02, /* 0xc322658c */
|
|
||||||
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
|
|
||||||
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
|
|
||||||
ra7 = -9.8143291473e+00, /* 0xc11d077e */
|
|
||||||
sa1 = 1.9651271820e+01, /* 0x419d35ce */
|
|
||||||
sa2 = 1.3765776062e+02, /* 0x4309a863 */
|
|
||||||
sa3 = 4.3456588745e+02, /* 0x43d9486f */
|
|
||||||
sa4 = 6.4538726807e+02, /* 0x442158c9 */
|
|
||||||
sa5 = 4.2900814819e+02, /* 0x43d6810b */
|
|
||||||
sa6 = 1.0863500214e+02, /* 0x42d9451f */
|
|
||||||
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
|
|
||||||
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
|
|
||||||
/*
|
|
||||||
* Coefficients for approximation to erfc in [1/.35,28]
|
|
||||||
*/
|
|
||||||
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
|
|
||||||
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
|
|
||||||
rb2 = -1.7757955551e+01, /* 0xc18e104b */
|
|
||||||
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
|
|
||||||
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
|
|
||||||
rb5 = -1.0250950928e+03, /* 0xc480230b */
|
|
||||||
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
|
|
||||||
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
|
|
||||||
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
|
|
||||||
sb3 = 1.5367296143e+03, /* 0x44c01759 */
|
|
||||||
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
|
|
||||||
sb5 = 2.5530502930e+03, /* 0x451f90ce */
|
|
||||||
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
|
|
||||||
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, erff, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix >= 0x7f800000, cond) /* erf(nan)=nan */
|
|
||||||
v64si i = (hx >> 31) << 1;
|
|
||||||
/* erf(+-inf)=+-1 */
|
|
||||||
VECTOR_RETURN (__builtin_convertvector (1 - i, v64sf) + 1.0f / x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3f580000, cond) /* |x|<0.84375 */
|
|
||||||
VECTOR_IF2 (ix < 0x31800000, cond2, cond) /* |x|<2**-28 */
|
|
||||||
VECTOR_IF2 (ix < 0x04000000, cond3, cond2) /* avoid underflow */
|
|
||||||
VECTOR_RETURN (0.125f*(8.0f*x + efx8*x), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (x + efx*x, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf z = x*x;
|
|
||||||
v64sf r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
|
|
||||||
v64sf s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
|
|
||||||
v64sf y = r/s;
|
|
||||||
|
|
||||||
VECTOR_RETURN (x + x*y, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3fa00000, cond) /* 0.84375 <= |x| < 1.25 */
|
|
||||||
v64sf s = __builtin_gcn_fabsvf (x) - 1.0f;
|
|
||||||
v64sf P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
|
|
||||||
v64sf Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
|
|
||||||
VECTOR_IF2 (hx >= 0, cond2, cond)
|
|
||||||
VECTOR_RETURN (erx + P/Q, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (-erx - P/Q, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (ix >= 0x40c00000, cond) /* inf>|x|>=6 */
|
|
||||||
VECTOR_IF2 (hx >= 0, cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0f - tiny), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (tiny - 1.0f), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
x = __builtin_gcn_fabsvf(x);
|
|
||||||
v64sf s = 1.0f / (x*x);
|
|
||||||
v64sf R, S;
|
|
||||||
VECTOR_IF (ix < 0x4036DB6E, cond) /* |x| < 1/0.35 */
|
|
||||||
VECTOR_COND_MOVE (R, ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
|
|
||||||
ra5+s*(ra6+s*ra7)))))), cond);
|
|
||||||
VECTOR_COND_MOVE (S, one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
|
|
||||||
sa5+s*(sa6+s*(sa7+s*sa8))))))), cond);
|
|
||||||
VECTOR_ELSE (cond) /* |x| >= 1/0.35 */
|
|
||||||
VECTOR_COND_MOVE (R, rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
|
|
||||||
rb5+s*rb6))))), cond);
|
|
||||||
VECTOR_COND_MOVE (S, one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
|
|
||||||
sb5+s*(sb6+s*sb7)))))), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
v64sf z;
|
|
||||||
SET_FLOAT_WORD (z, ix & 0xfffff000, NO_COND);
|
|
||||||
v64sf r = v64sf_expf_aux (-z*z - 0.5625f, __mask)
|
|
||||||
* v64sf_expf_aux ((z-x)*(z+x) + R/S, __mask);
|
|
||||||
VECTOR_RETURN (one - r/x, hx >= 0);
|
|
||||||
VECTOR_RETURN (r/x - one, hx < 0);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (erff, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,88 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/ef_exp.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float
|
|
||||||
huge = 1.0e+30,
|
|
||||||
twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
|
|
||||||
ln2HI[2] ={ 6.9313812256e-01, /* 0x3f317180 */
|
|
||||||
-6.9313812256e-01,}, /* 0xbf317180 */
|
|
||||||
ln2LO[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */
|
|
||||||
-9.0580006145e-06,}, /* 0xb717f7d1 */
|
|
||||||
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
|
|
||||||
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
|
|
||||||
P2 = -2.7777778450e-03, /* 0xbb360b61 */
|
|
||||||
P3 = 6.6137559770e-05, /* 0x388ab355 */
|
|
||||||
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
|
|
||||||
P5 = 4.1381369442e-08; /* 0x3331bb4c */
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, expf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si k = VECTOR_INIT (0);
|
|
||||||
v64si sx;
|
|
||||||
GET_FLOAT_WORD(sx, x, NO_COND);
|
|
||||||
v64si xsb = (sx>>31)&1; /* sign bit of x */
|
|
||||||
v64si hx = sx & 0x7fffffff; /* high word of |x| */
|
|
||||||
|
|
||||||
/* filter out non-finite argument */
|
|
||||||
VECTOR_RETURN (x+x, FLT_UWORD_IS_NAN(hx)); /* NaN */
|
|
||||||
VECTOR_RETURN (x, FLT_UWORD_IS_INFINITE(hx) & (xsb == 0));
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), FLT_UWORD_IS_INFINITE (hx)); /* exp(+-inf)={inf,0} */
|
|
||||||
VECTOR_RETURN (v64sf_math_oflowf (VECTOR_INIT (0)), sx > FLT_UWORD_LOG_MAX); /* overflow */
|
|
||||||
VECTOR_RETURN (v64sf_math_uflowf (VECTOR_INIT (0)), (sx < 0) & (hx > FLT_UWORD_LOG_MIN)); /* underflow */
|
|
||||||
|
|
||||||
/* argument reduction */
|
|
||||||
v64sf hi, lo;
|
|
||||||
VECTOR_IF (hx > 0x3eb17218, cond) /* if |x| > 0.5 ln2 */
|
|
||||||
VECTOR_IF2 (hx < 0x3F851592, cond2, cond) /* and |x| < 1.5 ln2 */
|
|
||||||
VECTOR_COND_MOVE (hi, x-ln2HI[0], cond2 & (xsb == 0));
|
|
||||||
VECTOR_COND_MOVE (hi, x-ln2HI[1], cond2 & (xsb == 1));
|
|
||||||
VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[0]), cond2 & (xsb == 0));
|
|
||||||
VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[1]), cond2 & (xsb == 1));
|
|
||||||
VECTOR_COND_MOVE (k, 1-xsb-xsb, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x + 0.5f, v64si), cond2 & (xsb == 0));
|
|
||||||
VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x - 0.5f, v64si), cond2 & (xsb == 1));
|
|
||||||
v64sf t = __builtin_convertvector (k, v64sf);
|
|
||||||
VECTOR_COND_MOVE (hi, x - t*ln2HI[0], cond2); /* t*ln2HI is exact here */
|
|
||||||
VECTOR_COND_MOVE (lo, t*ln2LO[0], cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE (x, hi - lo, cond);
|
|
||||||
VECTOR_ELSEIF (hx < 0x34000000, cond) /* when |x|<2**-23 */
|
|
||||||
VECTOR_RETURN (1.0f+x, cond & (huge+x > 1.0f)); /* trigger inexact */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* x is now in primary range */
|
|
||||||
v64sf t = x*x;
|
|
||||||
v64sf c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
|
||||||
VECTOR_RETURN (1.0f - ((x*c)/(c-2.0f)-x), k==0);
|
|
||||||
v64sf y = 1.0f - ((lo-(x*c)/(2.0f-c))-hi);
|
|
||||||
VECTOR_IF (k >= -125, cond)
|
|
||||||
v64si hy;
|
|
||||||
GET_FLOAT_WORD(hy, y, cond);
|
|
||||||
SET_FLOAT_WORD(y,hy+(k<<23), cond); /* add k to y's exponent */
|
|
||||||
VECTOR_RETURN (y, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64si hy;
|
|
||||||
GET_FLOAT_WORD(hy, y, cond);
|
|
||||||
SET_FLOAT_WORD(y, hy+((k+100)<<23), cond); /* add k to y's exponent */
|
|
||||||
VECTOR_RETURN (y*twom100, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (expf, sf, sf)
|
|
|
@ -1,18 +0,0 @@
|
||||||
/* Copyright (C) 2002 by Red Hat, Incorporated. All rights reserved.
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and distribute this software
|
|
||||||
* is freely granted, provided that this notice is preserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_exp2.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_powf_aux (v64sf, v64sf, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, exp2f, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_powf_aux (VECTOR_INIT (2.0f), x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (exp2f, sf, sf)
|
|
|
@ -1,23 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/v64sf_finite.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64si, finitef, v64sf x)
|
|
||||||
{
|
|
||||||
v64si ix = CAST_VECTOR (v64si, x);
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
return FLT_UWORD_IS_FINITE(ix);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (finitef, si, sf)
|
|
|
@ -1,133 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_fmod.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT(v64sf);
|
|
||||||
|
|
||||||
v64si hx, hy, hz;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
GET_FLOAT_WORD (hy, y, NO_COND);
|
|
||||||
v64si sx = hx & 0x80000000; /* sign of x */
|
|
||||||
hx ^=sx; /* |x| */
|
|
||||||
hy &= 0x7fffffff; /* |y| */
|
|
||||||
|
|
||||||
v64sf zeroes = VECTOR_MERGE (VECTOR_INIT (-0.0f),
|
|
||||||
VECTOR_INIT (0.0f),
|
|
||||||
sx != 0);
|
|
||||||
|
|
||||||
/* purge off exception values */
|
|
||||||
VECTOR_IF ((hy == 0) | (hx >= 0x7f800000)
|
|
||||||
| (hy > 0x7f800000), cond) // y=0, or x not finite or y is NaN
|
|
||||||
VECTOR_RETURN ((x * y) / (x * y), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx < hy, cond) // |x|<|y| return x
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx == hy, cond)
|
|
||||||
VECTOR_RETURN (zeroes, hx == hy); // |x|=|y| return x*0
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine ix = ilogb(x) */
|
|
||||||
v64si ix;
|
|
||||||
VECTOR_IF (hx < 0x00800000, cond) // subnormal x
|
|
||||||
ix = VECTOR_INIT (-126);
|
|
||||||
for (v64si i = (hx << 8);
|
|
||||||
!ALL_ZEROES_P (cond & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (ix, (hx >> 23) - 127, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine iy = ilogb(y) */
|
|
||||||
v64si iy;
|
|
||||||
VECTOR_IF (hy < 0x00800000, cond) // subnormal y
|
|
||||||
iy = VECTOR_INIT (-126);
|
|
||||||
for (v64si i = (hy << 8); !ALL_ZEROES_P (cond & (i >= 0)); i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (iy, iy - 1, cond & (i >= 0));
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (iy, (hy >> 23) - 127, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* set up {hx,lx}, {hy,ly} and align y to x */
|
|
||||||
VECTOR_IF (ix >= -126, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, 0x00800000 | (0x007fffff & hx), cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal x, shift x to normal
|
|
||||||
{
|
|
||||||
v64si n = -126 - ix;
|
|
||||||
VECTOR_COND_MOVE (hx, hx << n, cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (iy >= -126, cond)
|
|
||||||
VECTOR_COND_MOVE (hy, 0x00800000 | (0x007fffff & hy), cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal y, shift y to normal
|
|
||||||
{
|
|
||||||
v64si n = -126 - iy;
|
|
||||||
VECTOR_COND_MOVE (hy, hy << n, cond);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* fix point fmod */
|
|
||||||
v64si n = ix - iy;
|
|
||||||
v64si cond = n != 0;
|
|
||||||
|
|
||||||
while (!ALL_ZEROES_P (cond))
|
|
||||||
{
|
|
||||||
hz = hx - hy;
|
|
||||||
VECTOR_IF2 (hz < 0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (hx, hx + hx, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0
|
|
||||||
VECTOR_RETURN (zeroes, cond3);
|
|
||||||
VECTOR_ELSE2 (cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE (hx, hz + hz, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
n += cond; // Active lanes should be -1
|
|
||||||
cond &= (n != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
hz = hx - hy;
|
|
||||||
VECTOR_COND_MOVE (hx, hz, hz >= 0);
|
|
||||||
|
|
||||||
/* convert back to floating value and restore the sign */
|
|
||||||
VECTOR_RETURN (zeroes, hx == 0); // return sign(x)*0
|
|
||||||
|
|
||||||
cond = hx < 0x00800000;
|
|
||||||
while (!ALL_ZEROES_P (cond)) // normalize x
|
|
||||||
{
|
|
||||||
VECTOR_COND_MOVE (hx, hx + hx, cond);
|
|
||||||
iy += cond; // Active lanes should be -1
|
|
||||||
|
|
||||||
cond &= (hx < 0x00800000);
|
|
||||||
}
|
|
||||||
VECTOR_IF (iy >= -126, cond) // normalize output
|
|
||||||
VECTOR_COND_MOVE (hx, (hx - 0x00800000) | ((iy + 127) << 23), cond);
|
|
||||||
SET_FLOAT_WORD (x, hx | sx, cond);
|
|
||||||
VECTOR_ELSE (cond) // subnormal output */
|
|
||||||
n = -126 - iy;
|
|
||||||
hx >>= n;
|
|
||||||
SET_FLOAT_WORD (x, hx | sx, cond);
|
|
||||||
x *= VECTOR_INIT (1.0f); /* create necessary signal */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN (x, NO_COND); /* exact output */
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (fmodf, sf, sf)
|
|
|
@ -1,10 +0,0 @@
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_lgammaf_aux (v64sf x, v64si __mask);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, gammaf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_lgammaf_aux(x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (gammaf, sf, sf)
|
|
|
@ -1,104 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/ef_hypot.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_sqrtf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, hypotf, v64sf x, v64sf y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf a = x;
|
|
||||||
v64sf b = y;
|
|
||||||
|
|
||||||
v64si ha;
|
|
||||||
GET_FLOAT_WORD (ha, x, NO_COND);
|
|
||||||
ha &= 0x7fffffffL;
|
|
||||||
v64si hb;
|
|
||||||
GET_FLOAT_WORD (hb, y, NO_COND);
|
|
||||||
hb &= 0x7fffffffL;
|
|
||||||
|
|
||||||
VECTOR_IF (hb > ha, cond)
|
|
||||||
v64si j = ha;
|
|
||||||
VECTOR_COND_MOVE (ha, hb, cond);
|
|
||||||
VECTOR_COND_MOVE (hb, j, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
SET_FLOAT_WORD (a, ha, NO_COND); /* a <- |a| */
|
|
||||||
SET_FLOAT_WORD (b, hb, NO_COND); /* b <- |b| */
|
|
||||||
VECTOR_IF((ha - hb) > 0xf000000L, cond) // x/y > 2**30 */
|
|
||||||
VECTOR_RETURN (a + b, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64si k = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
VECTOR_IF (ha > 0x58800000L, cond) /* a>2**50 */
|
|
||||||
VECTOR_IF2 (ha >= 0x7f800000L, cond2, cond) /* Inf or NaN */
|
|
||||||
v64sf w = a + b; // for sNaN */
|
|
||||||
VECTOR_COND_MOVE (w, a, cond2 & (ha == 0x7f800000));
|
|
||||||
VECTOR_COND_MOVE (w, b, cond2 & (hb == 0x7f800000));
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* scale a and b by 2**-60 */
|
|
||||||
VECTOR_COND_MOVE (ha, ha - 0x5d800000, cond);
|
|
||||||
VECTOR_COND_MOVE (hb, hb - 0x5d800000, cond);
|
|
||||||
VECTOR_COND_MOVE (k, k + 60, cond);
|
|
||||||
SET_FLOAT_WORD (a, ha, cond);
|
|
||||||
SET_FLOAT_WORD (b, hb, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hb < 0x26800000, cond) /* b < 2**-50 */
|
|
||||||
VECTOR_IF2 (hb <= 0x007fffff, cond2, cond) /* subnormal b or 0 */
|
|
||||||
VECTOR_RETURN (a, cond2 & (hb == 0));
|
|
||||||
/* t1=2^126 */
|
|
||||||
v64sf t1;
|
|
||||||
SET_FLOAT_WORD (t1, VECTOR_INIT (0x3f000000), cond2);
|
|
||||||
VECTOR_COND_MOVE (b, b * t1, cond2);
|
|
||||||
VECTOR_COND_MOVE (a, a * t1, cond2);
|
|
||||||
VECTOR_COND_MOVE (k, k - 126, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond) /* scale a and b by 2^60 */
|
|
||||||
VECTOR_COND_MOVE (ha, ha + 0x5d800000, cond2); /* a *= 2^60 */
|
|
||||||
VECTOR_COND_MOVE (hb, hb + 0x5d800000, cond2); /* b *= 2^60 */
|
|
||||||
VECTOR_COND_MOVE (k, k - 60, cond2);
|
|
||||||
SET_FLOAT_WORD (a, ha, cond2);
|
|
||||||
SET_FLOAT_WORD (b, hb, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* medium size a and b */
|
|
||||||
v64sf w = a - b;
|
|
||||||
VECTOR_IF (w > b, cond)
|
|
||||||
v64sf t1;
|
|
||||||
SET_FLOAT_WORD (t1, ha & 0xfffff000, cond);
|
|
||||||
v64sf t2 = a - t1;
|
|
||||||
VECTOR_COND_MOVE (w, v64sf_sqrtf_aux (t1*t1 - (b*(-b) - t2 * (a + t1)), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (a, a+a, cond);
|
|
||||||
v64sf y1;
|
|
||||||
SET_FLOAT_WORD (y1, hb & 0xfffff000, cond);
|
|
||||||
v64sf y2 = b - y1;
|
|
||||||
v64sf t1;
|
|
||||||
SET_FLOAT_WORD (t1, ha + 0x00800000, cond);
|
|
||||||
v64sf t2 = a - t1;
|
|
||||||
VECTOR_COND_MOVE (w, v64sf_sqrtf_aux (t1*y1 - (w*(-w) - (t1*y2 + t2*b)), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (k != 0, cond)
|
|
||||||
v64sf t1;
|
|
||||||
SET_FLOAT_WORD (t1, 0x3f800000 + (k << 23), cond);
|
|
||||||
VECTOR_RETURN (t1 * w, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (w, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (hypotf, sf, sf)
|
|
|
@ -1,42 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_ilogb.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64si, ilogbf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT(v64si);
|
|
||||||
|
|
||||||
v64si hx, ix;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
hx &= 0x7fffffff;
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_ZERO (hx), cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond); // FP_ILOGB0
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond)
|
|
||||||
ix = VECTOR_INIT (-126);
|
|
||||||
for (v64si i = (hx << 8);
|
|
||||||
!ALL_ZEROES_P (cond & (i > 0));
|
|
||||||
i <<= 1)
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
|
|
||||||
VECTOR_RETURN (ix, cond);
|
|
||||||
VECTOR_ELSEIF (~FLT_UWORD_IS_FINITE (hx), cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN ((hx >> 23) - 127, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (ilogbf, si, sf)
|
|
|
@ -1,23 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_isnan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64si, isnanf, v64sf x)
|
|
||||||
{
|
|
||||||
v64si ix = CAST_VECTOR (v64si, x);
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
return FLT_UWORD_IS_NAN (ix);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (isnanf, si, sf)
|
|
|
@ -1,11 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_ispos.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si
|
|
||||||
v64sf_isposf (v64sf x)
|
|
||||||
{
|
|
||||||
v64si wx = CAST_VECTOR (v64si, x);
|
|
||||||
|
|
||||||
return (wx & 0x80000000) == 0;
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/wf_lgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_finitef_aux (v64sf x, v64si __mask);
|
|
||||||
v64sf v64sf_lgammaf_r_aux (v64sf x, v64si *signgamp, v64si __mask);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, lgammaf, v64sf x)
|
|
||||||
{
|
|
||||||
v64sf y = v64sf_lgammaf_r_aux(x, &(_REENT_V64SI_SIGNGAM(_V64_REENT)), __mask);
|
|
||||||
if (ALL_ZEROES_P(v64sf_finitef_aux(y, __mask)) & !ALL_ZEROES_P(v64sf_finitef_aux(x, __mask))) {
|
|
||||||
/* lgamma(finite) overflow */
|
|
||||||
errno = ERANGE;
|
|
||||||
}
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (lgammaf, sf, sf)
|
|
|
@ -1,286 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/erf_lgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float two23= 8.3886080000e+06, /* 0x4b000000 */
|
|
||||||
half= 5.0000000000e-01, /* 0x3f000000 */
|
|
||||||
one = 1.0000000000e+00, /* 0x3f800000 */
|
|
||||||
pi = 3.1415927410e+00, /* 0x40490fdb */
|
|
||||||
a0 = 7.7215664089e-02, /* 0x3d9e233f */
|
|
||||||
a1 = 3.2246702909e-01, /* 0x3ea51a66 */
|
|
||||||
a2 = 6.7352302372e-02, /* 0x3d89f001 */
|
|
||||||
a3 = 2.0580807701e-02, /* 0x3ca89915 */
|
|
||||||
a4 = 7.3855509982e-03, /* 0x3bf2027e */
|
|
||||||
a5 = 2.8905137442e-03, /* 0x3b3d6ec6 */
|
|
||||||
a6 = 1.1927076848e-03, /* 0x3a9c54a1 */
|
|
||||||
a7 = 5.1006977446e-04, /* 0x3a05b634 */
|
|
||||||
a8 = 2.2086278477e-04, /* 0x39679767 */
|
|
||||||
a9 = 1.0801156895e-04, /* 0x38e28445 */
|
|
||||||
a10 = 2.5214456400e-05, /* 0x37d383a2 */
|
|
||||||
a11 = 4.4864096708e-05, /* 0x383c2c75 */
|
|
||||||
tc = 1.4616321325e+00, /* 0x3fbb16c3 */
|
|
||||||
tf = -1.2148628384e-01, /* 0xbdf8cdcd */
|
|
||||||
/* tt = -(tail of tf) */
|
|
||||||
tt = 6.6971006518e-09, /* 0x31e61c52 */
|
|
||||||
t0 = 4.8383611441e-01, /* 0x3ef7b95e */
|
|
||||||
t1 = -1.4758771658e-01, /* 0xbe17213c */
|
|
||||||
t2 = 6.4624942839e-02, /* 0x3d845a15 */
|
|
||||||
t3 = -3.2788541168e-02, /* 0xbd064d47 */
|
|
||||||
t4 = 1.7970675603e-02, /* 0x3c93373d */
|
|
||||||
t5 = -1.0314224288e-02, /* 0xbc28fcfe */
|
|
||||||
t6 = 6.1005386524e-03, /* 0x3bc7e707 */
|
|
||||||
t7 = -3.6845202558e-03, /* 0xbb7177fe */
|
|
||||||
t8 = 2.2596477065e-03, /* 0x3b141699 */
|
|
||||||
t9 = -1.4034647029e-03, /* 0xbab7f476 */
|
|
||||||
t10 = 8.8108185446e-04, /* 0x3a66f867 */
|
|
||||||
t11 = -5.3859531181e-04, /* 0xba0d3085 */
|
|
||||||
t12 = 3.1563205994e-04, /* 0x39a57b6b */
|
|
||||||
t13 = -3.1275415677e-04, /* 0xb9a3f927 */
|
|
||||||
t14 = 3.3552918467e-04, /* 0x39afe9f7 */
|
|
||||||
u0 = -7.7215664089e-02, /* 0xbd9e233f */
|
|
||||||
u1 = 6.3282704353e-01, /* 0x3f2200f4 */
|
|
||||||
u2 = 1.4549225569e+00, /* 0x3fba3ae7 */
|
|
||||||
u3 = 9.7771751881e-01, /* 0x3f7a4bb2 */
|
|
||||||
u4 = 2.2896373272e-01, /* 0x3e6a7578 */
|
|
||||||
u5 = 1.3381091878e-02, /* 0x3c5b3c5e */
|
|
||||||
v1 = 2.4559779167e+00, /* 0x401d2ebe */
|
|
||||||
v2 = 2.1284897327e+00, /* 0x4008392d */
|
|
||||||
v3 = 7.6928514242e-01, /* 0x3f44efdf */
|
|
||||||
v4 = 1.0422264785e-01, /* 0x3dd572af */
|
|
||||||
v5 = 3.2170924824e-03, /* 0x3b52d5db */
|
|
||||||
s0 = -7.7215664089e-02, /* 0xbd9e233f */
|
|
||||||
s1 = 2.1498242021e-01, /* 0x3e5c245a */
|
|
||||||
s2 = 3.2577878237e-01, /* 0x3ea6cc7a */
|
|
||||||
s3 = 1.4635047317e-01, /* 0x3e15dce6 */
|
|
||||||
s4 = 2.6642270386e-02, /* 0x3cda40e4 */
|
|
||||||
s5 = 1.8402845599e-03, /* 0x3af135b4 */
|
|
||||||
s6 = 3.1947532989e-05, /* 0x3805ff67 */
|
|
||||||
r1 = 1.3920053244e+00, /* 0x3fb22d3b */
|
|
||||||
r2 = 7.2193557024e-01, /* 0x3f38d0c5 */
|
|
||||||
r3 = 1.7193385959e-01, /* 0x3e300f6e */
|
|
||||||
r4 = 1.8645919859e-02, /* 0x3c98bf54 */
|
|
||||||
r5 = 7.7794247773e-04, /* 0x3a4beed6 */
|
|
||||||
r6 = 7.3266842264e-06, /* 0x36f5d7bd */
|
|
||||||
w0 = 4.1893854737e-01, /* 0x3ed67f1d */
|
|
||||||
w1 = 8.3333335817e-02, /* 0x3daaaaab */
|
|
||||||
w2 = -2.7777778450e-03, /* 0xbb360b61 */
|
|
||||||
w3 = 7.9365057172e-04, /* 0x3a500cfd */
|
|
||||||
w4 = -5.9518753551e-04, /* 0xba1c065c */
|
|
||||||
w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */
|
|
||||||
w6 = -1.6309292987e-03; /* 0xbad5c4e8 */
|
|
||||||
static const float zero= 0.0000000000e+00;
|
|
||||||
|
|
||||||
v64sf v64sf_cosf_aux (v64sf x, v64si __mask);
|
|
||||||
v64sf v64sf_logf_aux (v64sf x, v64si __mask);
|
|
||||||
v64sf v64sf_sinf_aux (v64sf x, v64si __mask);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_floorvf) \
|
|
||||||
&& __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
static v64sf
|
|
||||||
v64sf_sin_pif (v64sf x)
|
|
||||||
{
|
|
||||||
// Explicitly create mask for internal function.
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf y, z;
|
|
||||||
v64si n, ix;
|
|
||||||
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
|
|
||||||
VECTOR_IF (ix < 0x3e800000, cond)
|
|
||||||
VECTOR_RETURN (v64sf_sinf_aux (pi * x, __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
y = -x; /* x is assume negative */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* argument reduction, make sure inexact flag not raised if input
|
|
||||||
* is an integer
|
|
||||||
*/
|
|
||||||
z = __builtin_gcn_floorvf (y);
|
|
||||||
VECTOR_IF (z != y, cond)
|
|
||||||
/* inexact anyway */
|
|
||||||
VECTOR_COND_MOVE(y, y * 0.5F, cond);
|
|
||||||
VECTOR_COND_MOVE(y, 2.0F * (y - __builtin_gcn_floorvf (y)), cond); /* y = |x| mod 2.0 */
|
|
||||||
VECTOR_COND_MOVE(n, __builtin_convertvector(y * 4.0F, v64si), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_IF2 (ix >= 0x4b800000, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(y, VECTOR_INIT(zero), cond2);
|
|
||||||
VECTOR_COND_MOVE(n, VECTOR_INIT(0), cond2); /* y must be even */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y + two23 /* exact */, cond2 & (ix < 0x4b000000));
|
|
||||||
GET_FLOAT_WORD (n, z, cond2);
|
|
||||||
VECTOR_COND_MOVE(n, n & 1, cond2);
|
|
||||||
VECTOR_COND_MOVE(y, __builtin_convertvector(n, v64sf), cond2);
|
|
||||||
VECTOR_COND_MOVE(n, n << 2, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF (n == 0, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * y, __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 1 | n == 2, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64sf_cosf_aux (pi * (0.5F - y), __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 3 | n == 4, cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * (VECTOR_INIT(one) - y), __mask), cond);
|
|
||||||
VECTOR_ELSEIF (n == 5 | n == 6, cond)
|
|
||||||
VECTOR_COND_MOVE(y, -v64sf_cosf_aux (pi * (y - 1.5F), __mask), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * (y - 2.0F), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN(-y, NO_COND);
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, lgammaf_r, v64sf x, v64si *signgamp)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf t,y,z,nadj = VECTOR_INIT(0.0F),p,p1,p2,p3,q,r,w;
|
|
||||||
v64si i,hx,ix;
|
|
||||||
|
|
||||||
GET_FLOAT_WORD(hx,x,NO_COND);
|
|
||||||
|
|
||||||
/* purge off +-inf, NaN, +-0, and negative arguments */
|
|
||||||
*signgamp = VECTOR_INIT(1);
|
|
||||||
ix = hx&0x7fffffff;
|
|
||||||
VECTOR_IF(ix>=0x7f800000, cond)
|
|
||||||
VECTOR_RETURN (x*x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF(ix==0, cond)
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (hx<0));
|
|
||||||
VECTOR_RETURN(one/(x-x), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (ix < 0x30800000, cond) /* |x|<2**-30, return -log(|x|) */
|
|
||||||
VECTOR_IF2(hx<0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond);
|
|
||||||
VECTOR_RETURN (-v64sf_logf_aux(-x, __mask), cond2);
|
|
||||||
VECTOR_ELSE2(cond2, cond)
|
|
||||||
VECTOR_RETURN (-v64sf_logf_aux(x, __mask), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (hx < 0, cond)
|
|
||||||
VECTOR_IF2(ix>=0x4b000000, cond2, cond) /* |x|>=2**23, must be -integer */
|
|
||||||
VECTOR_RETURN(one/(x-x), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE (t, v64sf_sin_pif (x), cond);
|
|
||||||
VECTOR_IF2(t==zero, cond2, cond)
|
|
||||||
/* tgamma wants NaN instead of INFINITY */
|
|
||||||
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_COND_MOVE(nadj, v64sf_logf_aux(VECTOR_INIT(pi)/__builtin_gcn_fabsvf(t*x), __mask), cond);
|
|
||||||
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (t < zero));
|
|
||||||
VECTOR_COND_MOVE(x, -x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* purge off 1 and 2 */
|
|
||||||
VECTOR_IF(ix==0x3f800000|ix==0x40000000, cond)
|
|
||||||
VECTOR_COND_MOVE(r, VECTOR_INIT(0.0F), cond);
|
|
||||||
/* for x < 2.0 */
|
|
||||||
VECTOR_ELSEIF(ix<0x40000000, cond)
|
|
||||||
VECTOR_IF2(ix<=0x3f666666, cond2, cond)
|
|
||||||
/* lgamma(x) = lgamma(x+1)-log(x) */
|
|
||||||
r = -v64sf_logf_aux(x, __mask);
|
|
||||||
VECTOR_IF2(ix>=0x3f3b4a20, cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, one-x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
|
|
||||||
VECTOR_ELSEIF2(ix>=0x3e6d3308, cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x-(tc-one), cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
|
|
||||||
VECTOR_ELSE2(cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE2(cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(r, VECTOR_INIT(zero), cond2);
|
|
||||||
VECTOR_IF2(ix>=0x3fdda618, cond3, cond2) /* [1.7316,2] */
|
|
||||||
VECTOR_COND_MOVE(y, VECTOR_INIT(2.0F)-x, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
|
|
||||||
VECTOR_ELSEIF2(ix>=0x3F9da620, cond3, cond2) /* [1.23,1.73] */
|
|
||||||
VECTOR_COND_MOVE(y, x-tc, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
|
|
||||||
VECTOR_ELSE2(cond3, cond2)
|
|
||||||
VECTOR_COND_MOVE(y, x-one, cond3);
|
|
||||||
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_IF2(i==0, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(p1, a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p2, z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p, y*p1+p2, cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (p-(float)0.5*y), cond2);
|
|
||||||
VECTOR_ELSEIF2(i==1, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, y*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(w, z*y, cond2);
|
|
||||||
VECTOR_COND_MOVE(p1, t0+w*(t3+w*(t6+w*(t9 +w*t12))), cond2); /* parallel comp */
|
|
||||||
VECTOR_COND_MOVE(p2, t1+w*(t4+w*(t7+w*(t10+w*t13))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p3, t2+w*(t5+w*(t8+w*(t11+w*t14))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p, z*p1-(tt-w*(p2+y*p3)), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (tf + p), cond2);
|
|
||||||
VECTOR_ELSEIF2(i==2, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(p1, y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))), cond2);
|
|
||||||
VECTOR_COND_MOVE(p2, one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + (-(float)0.5*y + p1/p2), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSEIF(ix<0x41000000, cond)
|
|
||||||
/* x < 8.0 */
|
|
||||||
VECTOR_COND_MOVE(i, __builtin_convertvector(x, v64si), cond);
|
|
||||||
VECTOR_COND_MOVE(t, VECTOR_INIT(zero), cond);
|
|
||||||
VECTOR_COND_MOVE(y, x-__builtin_convertvector(i, v64sf), cond);
|
|
||||||
VECTOR_COND_MOVE(p, y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))), cond);
|
|
||||||
VECTOR_COND_MOVE(q, one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))), cond);
|
|
||||||
VECTOR_COND_MOVE(r, half*y+p/q, cond);
|
|
||||||
VECTOR_COND_MOVE(z, VECTOR_INIT(one), cond); /* lgamma(1+s) = log(s) + lgamma(s) */
|
|
||||||
VECTOR_IF2(i==7, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+(float)6.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i==7 | i==6, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+(float)5.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=5, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+(float)4.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=4, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+(float)3.0), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(i<=7 & i>=3, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE(z, z * (y+(float)2.0), cond2);
|
|
||||||
VECTOR_COND_MOVE(r, r + v64sf_logf_aux(z, __mask), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* 8.0 <= x < 2**58 */
|
|
||||||
VECTOR_ELSEIF(ix < 0x5c800000, cond)
|
|
||||||
VECTOR_COND_MOVE(t, v64sf_logf_aux(x, __mask), cond);
|
|
||||||
VECTOR_COND_MOVE(z, one/x, cond);
|
|
||||||
VECTOR_COND_MOVE(y, z*z, cond);
|
|
||||||
VECTOR_COND_MOVE(w, w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))), cond);
|
|
||||||
VECTOR_COND_MOVE(r, (x-half)*(t-one)+w, cond);
|
|
||||||
VECTOR_ELSE(cond)
|
|
||||||
/* 2**58 <= x <= inf */
|
|
||||||
VECTOR_COND_MOVE(r, x*(v64sf_logf_aux(x, __mask)-one), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF(hx<0, cond)
|
|
||||||
VECTOR_COND_MOVE(r, nadj - r, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN(r, NO_COND);
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,82 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_logarithm.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_finitef_aux (v64sf, v64si);
|
|
||||||
v64si v64sf_isnanf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
static const float a[] = { -0.64124943423745581147e+02,
|
|
||||||
0.16383943563021534222e+02,
|
|
||||||
-0.78956112887481257267 };
|
|
||||||
static const float b[] = { -0.76949932108494879777e+03,
|
|
||||||
0.31203222091924532844e+03,
|
|
||||||
-0.35667977739034646171e+02 };
|
|
||||||
static const float C1 = 0.693145752;
|
|
||||||
static const float C2 = 1.428606820e-06;
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpvf_mant) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpvf_exp)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, logf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
/* Check for domain/range errors here. */
|
|
||||||
VECTOR_IF (x == 0.0f, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (-z_infinity_f.f), cond);
|
|
||||||
VECTOR_ELSEIF (x < 0.0f, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
|
|
||||||
VECTOR_ELSEIF (~v64sf_finitef_aux (x, __mask), cond)
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_notanum_f.f),
|
|
||||||
VECTOR_INIT (z_infinity_f.f),
|
|
||||||
v64sf_isnanf_aux (x, __mask)),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Get the exponent and mantissa where x = f * 2^N. */
|
|
||||||
v64sf f = __builtin_gcn_frexpvf_mant (x);
|
|
||||||
v64si N = __builtin_gcn_frexpvf_exp (x);
|
|
||||||
|
|
||||||
v64sf z = f - 0.5f;
|
|
||||||
|
|
||||||
VECTOR_IF (f > (float) __SQRT_HALF, cond)
|
|
||||||
VECTOR_COND_MOVE (z, (z - 0.5f) / (f * 0.5f + 0.5f), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_COND_MOVE (N, N - 1, cond);
|
|
||||||
VECTOR_COND_MOVE (z, z / (z * 0.5f + 0.5f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf w = z * z;
|
|
||||||
|
|
||||||
/* Use Newton's method with 4 terms. */
|
|
||||||
z += z * w * ((a[2] * w + a[1]) * w + a[0]) / (((w + b[2]) * w + b[1]) * w + b[0]);
|
|
||||||
|
|
||||||
v64sf Nf = __builtin_convertvector(N, v64sf);
|
|
||||||
VECTOR_COND_MOVE (z, (Nf * C2 + z) + Nf * C1, N != 0);
|
|
||||||
|
|
||||||
VECTOR_RETURN (z, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (logf, sf, sf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, log1pf, v64sf x)
|
|
||||||
{
|
|
||||||
/* TODO: Implement algorithm with better precision. */
|
|
||||||
return v64sf_logf_aux (1 + x, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log1pf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,14 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_log10.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_logf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
static const float C3 = 0.4342944819;
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, log10f, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_logf_aux (x, __mask) * C3;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log10f, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_logf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
static const float C3 = 1.4426950408889634073599246810019;
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, log2f, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_logf_aux (x, __mask) * C3;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (log2f, sf, sf)
|
|
|
@ -1,55 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_modf.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_numtestf (v64sf);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, modff, v64sf x, v64sf *iptr)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
v64sf ret_i;
|
|
||||||
|
|
||||||
v64si i0;
|
|
||||||
GET_FLOAT_WORD (i0, x, NO_COND);
|
|
||||||
v64si j0 = ((i0 >> 23) & 0xff) - 0x7f; /* exponent of x */
|
|
||||||
v64sf zero;
|
|
||||||
SET_FLOAT_WORD (zero, i0 & 0x80000000,NO_COND);
|
|
||||||
|
|
||||||
VECTOR_IF (j0 < 23, cond) /* integer part in x*/
|
|
||||||
VECTOR_IF2 (j0 < 0, cond2, cond) /* |x|<1 */
|
|
||||||
VECTOR_COND_MOVE (ret_i, zero, cond2);
|
|
||||||
VECTOR_RETURN (x, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64si i = (0x007fffff) >> j0;
|
|
||||||
|
|
||||||
VECTOR_IF2 ((i0 & i) == 0, cond3, cond2) /* x is integral */
|
|
||||||
VECTOR_COND_MOVE (ret_i, x, cond3);
|
|
||||||
VECTOR_RETURN (zero, cond3);
|
|
||||||
VECTOR_ELSE2 (cond3, cond2)
|
|
||||||
SET_FLOAT_WORD (ret_i, i0 & ~i, cond3);
|
|
||||||
VECTOR_RETURN (x - ret_i, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond) /* no fraction part */
|
|
||||||
VECTOR_COND_MOVE (ret_i, x, cond);
|
|
||||||
VECTOR_IF2 (v64sf_numtestf (x) == NAN, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (ret_i, x + x, cond2);
|
|
||||||
VECTOR_RETURN (ret_i, cond2); /* x is NaN, return NaN */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (zero, cond); /* return +- 0 */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
*iptr = ret_i;
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_numtest.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si
|
|
||||||
v64sf_numtestf (v64sf x)
|
|
||||||
{
|
|
||||||
// Explicitly create mask for internal function.
|
|
||||||
v64si __mask = VECTOR_INIT (-1);
|
|
||||||
FUNCTION_INIT (v64si);
|
|
||||||
|
|
||||||
v64si wx;
|
|
||||||
GET_FLOAT_WORD (wx, x, NO_COND);
|
|
||||||
v64si exp = (wx & 0x7f800000) >> 23;
|
|
||||||
|
|
||||||
/* Check for a zero input. */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0), x == 0.0);
|
|
||||||
|
|
||||||
/* Check for not a number or infinity. */
|
|
||||||
VECTOR_IF (exp == 0xff, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (NAN), VECTOR_INIT (INF),
|
|
||||||
wx & 0x7fffff),
|
|
||||||
cond);
|
|
||||||
/* Otherwise it's a finite value. */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (NUM), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,292 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/ef_pow.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float
|
|
||||||
bp[] = {1.0, 1.5,},
|
|
||||||
dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */
|
|
||||||
dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */
|
|
||||||
zero = 0.0,
|
|
||||||
one = 1.0,
|
|
||||||
two = 2.0,
|
|
||||||
two24 = 16777216.0, /* 0x4b800000 */
|
|
||||||
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
|
|
||||||
L1 = 6.0000002384e-01, /* 0x3f19999a */
|
|
||||||
L2 = 4.2857143283e-01, /* 0x3edb6db7 */
|
|
||||||
L3 = 3.3333334327e-01, /* 0x3eaaaaab */
|
|
||||||
L4 = 2.7272811532e-01, /* 0x3e8ba305 */
|
|
||||||
L5 = 2.3066075146e-01, /* 0x3e6c3255 */
|
|
||||||
L6 = 2.0697501302e-01, /* 0x3e53f142 */
|
|
||||||
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
|
|
||||||
P2 = -2.7777778450e-03, /* 0xbb360b61 */
|
|
||||||
P3 = 6.6137559770e-05, /* 0x388ab355 */
|
|
||||||
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
|
|
||||||
P5 = 4.1381369442e-08, /* 0x3331bb4c */
|
|
||||||
lg2 = 6.9314718246e-01, /* 0x3f317218 */
|
|
||||||
lg2_h = 6.93145752e-01, /* 0x3f317200 */
|
|
||||||
lg2_l = 1.42860654e-06, /* 0x35bfbe8c */
|
|
||||||
ovt = 4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
|
|
||||||
cp = 9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
|
|
||||||
cp_h = 9.6179199219e-01, /* 0x3f763800 =head of cp */
|
|
||||||
cp_l = 4.7017383622e-06, /* 0x369dc3a0 =tail of cp_h */
|
|
||||||
ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
|
|
||||||
ivln2_h = 1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
|
|
||||||
ivln2_l = 7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/
|
|
||||||
|
|
||||||
v64sf v64sf_sqrtf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_scalbnf_aux (v64sf, v64si, v64si);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, powf, v64sf x, v64sf y)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si hx, hy;
|
|
||||||
GET_FLOAT_WORD (hx,x, NO_COND);
|
|
||||||
GET_FLOAT_WORD (hy,y, NO_COND);
|
|
||||||
v64si ix = hx&0x7fffffff;
|
|
||||||
v64si iy = hy&0x7fffffff;
|
|
||||||
|
|
||||||
/* y==zero: x**0 = 1 */
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_ZERO(iy), cond)
|
|
||||||
VECTOR_RETURN (x + y, cond & v64sf_issignalingf_inline(x));
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* x|y==NaN return NaN unless x==1 then return 1 */
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_NAN(ix) | FLT_UWORD_IS_NAN(iy), cond)
|
|
||||||
VECTOR_IF2 (hx==0x3f800000 & ~v64sf_issignalingf_inline(y), cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0f), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (x + y, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* determine if y is an odd int when x < 0
|
|
||||||
* yisint = 0 ... y is not an integer
|
|
||||||
* yisint = 1 ... y is an odd int
|
|
||||||
* yisint = 2 ... y is an even int
|
|
||||||
*/
|
|
||||||
v64si yisint = VECTOR_INIT (0);
|
|
||||||
|
|
||||||
VECTOR_IF (hx < 0, cond)
|
|
||||||
VECTOR_IF2 (iy >= 0x4b800000, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (yisint, VECTOR_INIT (2), cond2); /* even integer y */
|
|
||||||
VECTOR_ELSEIF2 (iy >= 0x3f800000, cond2, cond)
|
|
||||||
v64si k = (iy>>23)-0x7f; /* exponent */
|
|
||||||
v64si j = iy>>(23-k);
|
|
||||||
VECTOR_COND_MOVE (yisint, 2-(j&1), cond2 & (j<<(23-k))==iy);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* special value of y */
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_INFINITE(iy), cond) /* y is +-inf */
|
|
||||||
VECTOR_IF2 (ix==0x3f800000, cond2, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0f), cond2); /* +-1**+-inf = 1 */
|
|
||||||
VECTOR_ELSEIF2 (ix > 0x3f800000, cond2, cond) /* (|x|>1)**+-inf = inf,0 */
|
|
||||||
VECTOR_RETURN (y, cond2 & (hy >= 0));
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond) /* (|x|<1)**-,+inf = inf,0 */
|
|
||||||
VECTOR_RETURN (-y, cond2 & (hy<0));
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (iy==0x3f800000, cond) /* y is +-1 */
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (1.0f) / x, cond & (hy<0));
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN (x*x, hy==0x40000000); /* y is 2 */
|
|
||||||
/* y is 0.5 */
|
|
||||||
/* x >= +0 */
|
|
||||||
VECTOR_RETURN (v64sf_sqrtf_aux (x, __mask), (hy==0x3f000000) & (hx >= 0));
|
|
||||||
|
|
||||||
v64sf ax = __builtin_gcn_fabsvf(x);
|
|
||||||
/* special value of x */
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_INFINITE(ix)|FLT_UWORD_IS_ZERO(ix)|ix==0x3f800000, cond)
|
|
||||||
v64sf z = ax; /*x is +-0,+-inf,+-1*/
|
|
||||||
VECTOR_COND_MOVE (z, VECTOR_INIT (1.0f) / z, cond & (hy < 0)); /* z = (1/|x|) */
|
|
||||||
VECTOR_IF2 (hx<0, cond2, cond)
|
|
||||||
VECTOR_IF2 (((ix-0x3f800000)|yisint)==0, cond3, cond2)
|
|
||||||
/* (-1)**non-int is NaN */
|
|
||||||
VECTOR_COND_MOVE (z, (z-z)/(z-z), cond3);
|
|
||||||
VECTOR_ELSEIF2 (yisint==1, cond3, cond2)
|
|
||||||
/* (x<0)**odd = -(|x|**odd) */
|
|
||||||
VECTOR_COND_MOVE (z, -z, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (z, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* (x<0)**(non-int) is NaN */
|
|
||||||
VECTOR_RETURN ((x-x)/(x-x), ((((hx >> 31) & 1) - 1)|yisint)==0);
|
|
||||||
|
|
||||||
v64sf t1, t2;
|
|
||||||
|
|
||||||
/* |y| is huge */
|
|
||||||
VECTOR_IF (iy>0x4d000000, cond) /* if |y| > 2**27 */
|
|
||||||
/* over/underflow if x is not close to one */
|
|
||||||
VECTOR_IF2(ix<0x3f7ffff4, cond2, cond)
|
|
||||||
VECTOR_RETURN (v64sf_math_oflowf(VECTOR_INIT (0)), cond2 & (hy < 0));
|
|
||||||
VECTOR_RETURN (v64sf_math_uflowf(VECTOR_INIT (0)), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF2(ix>0x3f800007, cond2, cond)
|
|
||||||
VECTOR_RETURN (v64sf_math_oflowf(VECTOR_INIT (0)), cond2 & (hy > 0));
|
|
||||||
VECTOR_RETURN (v64sf_math_uflowf(VECTOR_INIT (0)), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/* now |1-x| is tiny <= 2**-20, suffice to compute
|
|
||||||
log(x) by x-x^2/2+x^3/3-x^4/4 */
|
|
||||||
v64sf t = ax-1; /* t has 20 trailing zeros */
|
|
||||||
v64sf w = (t*t)*(0.5f-t*(0.333333333333f-t*0.25f));
|
|
||||||
v64sf u = ivln2_h*t; /* ivln2_h has 16 sig. bits */
|
|
||||||
v64sf v = t*ivln2_l-w*ivln2;
|
|
||||||
VECTOR_COND_MOVE (t1, u+v, cond);
|
|
||||||
v64si is;
|
|
||||||
GET_FLOAT_WORD(is,t1, cond);
|
|
||||||
SET_FLOAT_WORD(t1,is&0xfffff000, cond);
|
|
||||||
VECTOR_COND_MOVE (t2, v-(t1-u), cond);
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.123456f), cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64si n = VECTOR_INIT (0);
|
|
||||||
/* take care subnormal number */
|
|
||||||
VECTOR_IF2 (FLT_UWORD_IS_SUBNORMAL(ix), cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (ax, ax * two24, cond);
|
|
||||||
VECTOR_COND_MOVE (n, n - 24, cond2);
|
|
||||||
GET_FLOAT_WORD(ix,ax, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
n += (ix>>23)-0x7f;
|
|
||||||
v64si j = ix&0x007fffff;
|
|
||||||
/* determine interval */
|
|
||||||
v64si ix = j|0x3f800000; /* normalize ix */
|
|
||||||
v64si k;
|
|
||||||
VECTOR_IF2 (j<=0x1cc471, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2); /* |x|<sqrt(3/2) */
|
|
||||||
VECTOR_ELSEIF2 (j<0x5db3d7, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (1), cond2); /* |x|<sqrt(3) */
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2);
|
|
||||||
VECTOR_COND_MOVE (n, n + 1, cond2);
|
|
||||||
VECTOR_COND_MOVE (ix, ix - 0x00800000, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
SET_FLOAT_WORD(ax,ix, cond);
|
|
||||||
|
|
||||||
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
|
||||||
/* bp[0]=1.0, bp[1]=1.5 */
|
|
||||||
v64sf bp_k = VECTOR_MERGE (VECTOR_INIT (bp[1]), VECTOR_INIT (bp[0]), k == 1);
|
|
||||||
v64sf u = ax-bp_k;
|
|
||||||
v64sf v = 1.0f/(ax+bp_k);
|
|
||||||
v64sf s = u*v;
|
|
||||||
v64sf s_h = s;
|
|
||||||
v64si is;
|
|
||||||
GET_FLOAT_WORD(is,s_h, cond);
|
|
||||||
SET_FLOAT_WORD(s_h,is&0xfffff000, cond);
|
|
||||||
/* t_h=ax+bp[k] High */
|
|
||||||
v64sf t_h;
|
|
||||||
SET_FLOAT_WORD(t_h,((ix>>1)|0x20000000)+0x0040000+(k<<21), cond);
|
|
||||||
v64sf t_l = ax - (t_h-bp_k);
|
|
||||||
v64sf s_l = v*((u-s_h*t_h)-s_h*t_l);
|
|
||||||
/* compute log(ax) */
|
|
||||||
v64sf s2 = s*s;
|
|
||||||
v64sf r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
|
|
||||||
r += s_l*(s_h+s);
|
|
||||||
s2 = s_h*s_h;
|
|
||||||
t_h = __builtin_convertvector(3.0f+s2+r, v64sf);
|
|
||||||
GET_FLOAT_WORD(is,t_h, cond);
|
|
||||||
SET_FLOAT_WORD(t_h,is&0xfffff000, cond);
|
|
||||||
t_l = r-((t_h-3.0f)-s2);
|
|
||||||
/* u+v = s*(1+...) */
|
|
||||||
u = s_h*t_h;
|
|
||||||
v = s_l*t_h+t_l*s;
|
|
||||||
/* 2/(3log2)*(s+...) */
|
|
||||||
v64sf p_h = u+v;
|
|
||||||
GET_FLOAT_WORD(is,p_h, cond);
|
|
||||||
SET_FLOAT_WORD(p_h,is&0xfffff000, cond);
|
|
||||||
v64sf p_l = v-(p_h-u);
|
|
||||||
v64sf z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
|
|
||||||
v64sf dp_l_k = VECTOR_MERGE (VECTOR_INIT (dp_l[1]), VECTOR_INIT (dp_l[0]), k == 1);
|
|
||||||
v64sf z_l = cp_l*p_h+p_l*cp+dp_l_k;
|
|
||||||
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
|
||||||
v64sf t = __builtin_convertvector (n, v64sf);
|
|
||||||
v64sf dp_h_k = VECTOR_MERGE (VECTOR_INIT (dp_h[1]), VECTOR_INIT (dp_h[0]), k == 1);
|
|
||||||
VECTOR_COND_MOVE (t1, (((z_h+z_l)+dp_h_k)+t), cond);
|
|
||||||
GET_FLOAT_WORD(is,t1, cond);
|
|
||||||
SET_FLOAT_WORD(t1,is&0xfffff000, cond);
|
|
||||||
VECTOR_COND_MOVE (t2, z_l-(((t1-t)-dp_h_k)-z_h), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf s = VECTOR_INIT (1.0f); /* s (sign of result -ve**odd) = -1 else = 1 */
|
|
||||||
VECTOR_COND_MOVE (s, VECTOR_INIT (-1.0f), /* (-ve)**(odd int) */
|
|
||||||
((hx>>31) != 0)&(yisint == 1));
|
|
||||||
|
|
||||||
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
|
||||||
v64si is;
|
|
||||||
GET_FLOAT_WORD(is,y, NO_COND);
|
|
||||||
v64sf y1;
|
|
||||||
SET_FLOAT_WORD(y1,is&0xfffff000, NO_COND);
|
|
||||||
v64sf p_l = (y-y1)*t1+y*t2;
|
|
||||||
v64sf p_h = y1*t1;
|
|
||||||
v64sf z = p_l+p_h;
|
|
||||||
v64si j;
|
|
||||||
GET_FLOAT_WORD(j,z, NO_COND);
|
|
||||||
v64si i = j&0x7fffffff;
|
|
||||||
VECTOR_IF (j>0, cond)
|
|
||||||
VECTOR_RETURN (v64sf_math_oflowf(s<0), cond & i>FLT_UWORD_EXP_MAX); /* overflow */
|
|
||||||
VECTOR_RETURN (v64sf_math_oflowf(s<0), cond & (i==FLT_UWORD_EXP_MAX)
|
|
||||||
& (p_l+ovt>z-p_h)); /* overflow */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (v64sf_math_uflowf(s<0), cond & (i>FLT_UWORD_EXP_MIN)); /* underflow */
|
|
||||||
VECTOR_RETURN (v64sf_math_uflowf(s<0), cond & (i==FLT_UWORD_EXP_MIN)
|
|
||||||
& (p_l<=z-p_h)); /* underflow */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
/*
|
|
||||||
* compute 2**(p_h+p_l)
|
|
||||||
*/
|
|
||||||
v64si k = (i>>23)-0x7f;
|
|
||||||
v64si n = VECTOR_INIT (0);
|
|
||||||
VECTOR_IF (i>0x3f000000, cond) /* if |z| > 0.5, set n = [z+0.5] */
|
|
||||||
VECTOR_COND_MOVE (n, j+(0x00800000>>(k+1)), cond);
|
|
||||||
k = ((n&0x7fffffff)>>23)-0x7f; /* new k for n */
|
|
||||||
v64sf t;
|
|
||||||
SET_FLOAT_WORD(t,n&~(0x007fffff>>k), cond);
|
|
||||||
VECTOR_COND_MOVE (n, ((n&0x007fffff)|0x00800000)>>(23-k), cond);
|
|
||||||
VECTOR_COND_MOVE (n, -n, cond & (j<0));
|
|
||||||
VECTOR_COND_MOVE (p_h, p_h - t, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
v64sf t = p_l+p_h;
|
|
||||||
GET_FLOAT_WORD(is,t, NO_COND);
|
|
||||||
SET_FLOAT_WORD(t,is&0xfffff000, NO_COND);
|
|
||||||
v64sf u = t*lg2_h;
|
|
||||||
v64sf v = (p_l-(t-p_h))*lg2+t*lg2_l;
|
|
||||||
z = u+v;
|
|
||||||
v64sf w = v-(z-u);
|
|
||||||
t = z*z;
|
|
||||||
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
|
||||||
v64sf r = (z*t1)/(t1-2.0f)-(w+z*w);
|
|
||||||
z = VECTOR_INIT (1.0f)-(r-z);
|
|
||||||
GET_FLOAT_WORD(j,z, NO_COND);
|
|
||||||
j += (n<<23);
|
|
||||||
VECTOR_IF ((j>>23)<=0, cond)
|
|
||||||
VECTOR_COND_MOVE (z, v64sf_scalbnf_aux(z, n, __mask), cond); /* subnormal output */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
SET_FLOAT_WORD(z, j, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (s*z, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (powf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,71 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/ef_remainder.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_fmodf_aux (v64sf, v64sf, v64si);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, remainderf, v64sf x, v64sf p)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
v64si hp;
|
|
||||||
GET_FLOAT_WORD (hp, p, NO_COND);
|
|
||||||
v64si sx = hx & 0x80000000;
|
|
||||||
hp &= 0x7fffffff;
|
|
||||||
hx &= 0x7fffffff;
|
|
||||||
|
|
||||||
/* purge off exception values */
|
|
||||||
/*if(hp==0) // p = 0 */
|
|
||||||
/*if((hx>=0x7f800000)|| // x not finite
|
|
||||||
((hp>0x7f800000))) // p is NaN */
|
|
||||||
VECTOR_RETURN ((x*p) / (x*p),
|
|
||||||
(hp == 0) | (hx >= 0x7f800000) | (hp > 0x7f800000));
|
|
||||||
|
|
||||||
/* if (hp<=0x7effffff) // now x < 2p */
|
|
||||||
VECTOR_COND_MOVE (x, v64sf_fmodf_aux (x, p+p, __mask), hp <= 0x7effffff);
|
|
||||||
|
|
||||||
/*if ((hx-hp)==0) */
|
|
||||||
VECTOR_RETURN (0.0f * x, (hx-hp) == 0);
|
|
||||||
|
|
||||||
x = __builtin_gcn_fabsvf (x);
|
|
||||||
p = __builtin_gcn_fabsvf (p);
|
|
||||||
|
|
||||||
VECTOR_IF (hp < 0x01000000, cond)
|
|
||||||
VECTOR_IF2 (x + x > p, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2);
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2 & (x + x >= p));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64sf p_half = 0.5f * p;
|
|
||||||
VECTOR_IF2 (x > p_half, cond2, cond)
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2);
|
|
||||||
VECTOR_COND_MOVE (x, x - p, cond2 & (x >= p_half));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
GET_FLOAT_WORD (hx, x, NO_COND);
|
|
||||||
SET_FLOAT_WORD (x, hx ^ sx, NO_COND);
|
|
||||||
|
|
||||||
VECTOR_RETURN (x, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (remainderf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,62 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_rint.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float TWO23[2] = {
|
|
||||||
8.3886080000e+06, /* 0x4b000000 */
|
|
||||||
-8.3886080000e+06, /* 0xcb000000 */
|
|
||||||
};
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, rintf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si i0;
|
|
||||||
GET_FLOAT_WORD (i0, x, NO_COND);
|
|
||||||
v64si sx = (i0 >> 31) & 1;
|
|
||||||
v64sf two23 = VECTOR_MERGE (TWO23[1] + x, TWO23[0] + x, sx != 0);
|
|
||||||
v64si ix = (i0 & 0x7fffffff);
|
|
||||||
v64si j0 = (ix >> 23) - 0x7f;
|
|
||||||
VECTOR_IF (j0 < 23, cond)
|
|
||||||
VECTOR_RETURN (x, cond & FLT_UWORD_IS_ZERO (ix));
|
|
||||||
VECTOR_IF2 (j0 < 0, cond2, cond)
|
|
||||||
v64si i1 = (i0 & 0x07fffff);
|
|
||||||
VECTOR_COND_MOVE (i0, i0 & 0xfff00000, cond2);
|
|
||||||
VECTOR_COND_MOVE (i0, i0 | (((i1 | -i1) >> 9) & 0x400000), cond2);
|
|
||||||
SET_FLOAT_WORD (x, i0, cond2);
|
|
||||||
v64sf w = two23 + x;
|
|
||||||
v64sf t = w - two23;
|
|
||||||
GET_FLOAT_WORD (i0, t, cond2);
|
|
||||||
SET_FLOAT_WORD (t, (i0&0x7fffffff)|(sx<<31), cond2);
|
|
||||||
VECTOR_RETURN (t, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64si i = (0x007fffff) >> j0;
|
|
||||||
VECTOR_RETURN (x, cond2 & ((i0 & i) == 0)); /* x is integral */
|
|
||||||
i >>= 1;
|
|
||||||
VECTOR_COND_MOVE (i0, (i0 & (~i)) | (0x200000 >> j0),
|
|
||||||
cond2 & ((i0 & i) != 0));
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
VECTOR_RETURN (x + x, cond & ~FLT_UWORD_IS_FINITE (ix)); /* inf or NaN */
|
|
||||||
VECTOR_RETURN (x, cond); /* x is integral */
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
SET_FLOAT_WORD (x, i0, NO_COND);
|
|
||||||
v64sf w = two23 + x;
|
|
||||||
VECTOR_RETURN (w - two23, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (rintf, sf, sf)
|
|
|
@ -1,59 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/ef_scalb.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_isnanf (v64sf);
|
|
||||||
v64si v64sf_finitef (v64sf);
|
|
||||||
v64sf v64sf_rintf_aux (v64sf, v64si);
|
|
||||||
v64sf v64sf_scalbnf_aux (v64sf, v64si, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, scalbf, v64sf x, v64sf fn)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
VECTOR_IF (v64sf_isnanf(x) | v64sf_isnanf(fn), cond)
|
|
||||||
VECTOR_RETURN (x * fn, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (~v64sf_finitef (fn), cond)
|
|
||||||
VECTOR_IF2 (fn > 0.0f, cond2, cond)
|
|
||||||
VECTOR_RETURN (x * fn, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
VECTOR_RETURN (x / (-fn), cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (v64sf_rintf_aux (fn, __mask) != fn, cond)
|
|
||||||
VECTOR_RETURN ((fn-fn)/(fn-fn), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
#if INT_MAX > 65000
|
|
||||||
VECTOR_IF (fn > 65000.0f, cond)
|
|
||||||
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (65000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (-fn > 65000.0f, cond)
|
|
||||||
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (-65000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
#else
|
|
||||||
VECTOR_IF (fn > 32000.0f, cond)
|
|
||||||
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (32000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (-fn > 32000.0f, cond)
|
|
||||||
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (-32000), __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
#endif
|
|
||||||
VECTOR_RETURN (v64sf_scalbnf_aux (x, __builtin_convertvector (fn, v64si), __mask),
|
|
||||||
NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS2 (scalbf, sf, sf)
|
|
|
@ -1,78 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/common/sf_scalbn.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
#include <limits.h>
|
|
||||||
#include <float.h>
|
|
||||||
|
|
||||||
#if INT_MAX > 50000
|
|
||||||
#define OVERFLOW_INT 50000
|
|
||||||
#else
|
|
||||||
#define OVERFLOW_INT 30000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const float
|
|
||||||
two25 = 3.355443200e+07, /* 0x4c000000 */
|
|
||||||
twom25 = 2.9802322388e-08, /* 0x33000000 */
|
|
||||||
huge = 1.0e+30,
|
|
||||||
tiny = 1.0e-30;
|
|
||||||
|
|
||||||
v64sf v64sf_copysignf_aux (v64sf, v64sf, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, scalbnf, v64sf x, v64si n)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
const v64sf huge_v = VECTOR_INIT ((float) huge);
|
|
||||||
const v64sf tiny_v = VECTOR_INIT ((float) tiny);
|
|
||||||
|
|
||||||
v64si ix;
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
v64si hx = ix & 0x7fffffff;
|
|
||||||
v64si k = hx >> 23; /* extract exponent */
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_ZERO(hx), cond)
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (~FLT_UWORD_IS_FINITE(hx), cond) /* NaN or Inf */
|
|
||||||
VECTOR_RETURN (x + x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond);
|
|
||||||
VECTOR_COND_MOVE (x, x * two25, cond);
|
|
||||||
GET_FLOAT_WORD (ix, x, cond);
|
|
||||||
VECTOR_COND_MOVE (k, ((ix & 0x7f800000) >> 23) - 25, cond);
|
|
||||||
VECTOR_IF2 (n < -50000, cond2, cond) /*underflow*/
|
|
||||||
VECTOR_RETURN (tiny * x, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (n > OVERFLOW_INT, cond) // in case integer overflow in n+k
|
|
||||||
VECTOR_RETURN (huge_v * v64sf_copysignf_aux (huge_v, x, __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
k = k + n;
|
|
||||||
VECTOR_IF (k > FLT_LARGEST_EXP, cond) /* overflow */
|
|
||||||
VECTOR_RETURN (huge_v * v64sf_copysignf_aux (huge_v, x, __mask), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (k > 0, cond) /* normal result */
|
|
||||||
SET_FLOAT_WORD (x, (ix & 0x807fffff) | (k << 23), cond);
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (k < FLT_SMALLEST_EXP, cond) /*underflow*/
|
|
||||||
VECTOR_RETURN (tiny_v * v64sf_copysignf_aux (tiny_v, x, __mask),
|
|
||||||
k < FLT_SMALLEST_EXP);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
k += 25; /* subnormal result */
|
|
||||||
SET_FLOAT_WORD (x, (ix & 0x807fffff) | (k << 23), NO_COND);
|
|
||||||
VECTOR_RETURN (x * twom25, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_signif.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_scalbf_aux (v64sf x, v64sf fn, v64si);
|
|
||||||
v64si v64sf_ilogbf_aux (v64sf x, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, significandf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_scalbf_aux (x, -__builtin_convertvector (v64sf_ilogbf_aux (x, __mask), v64sf), __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (significandf, sf, sf)
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_sin.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_sinef_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, sinf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_sinef_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sinf, sf, sf)
|
|
|
@ -1,93 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_sine.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_numtestf (v64sf);
|
|
||||||
|
|
||||||
static const float HALF_PI = 1.570796326;
|
|
||||||
static const float ONE_OVER_PI = 0.318309886;
|
|
||||||
static const float r[] = { -0.1666665668,
|
|
||||||
0.8333025139e-02,
|
|
||||||
-0.1980741872e-03,
|
|
||||||
0.2601903036e-5 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, sinef, v64sf x, int cosine)
|
|
||||||
{
|
|
||||||
const float YMAX = 210828714.0;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si num_type = v64sf_numtestf (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Use sin and cos properties to ease computations. */
|
|
||||||
v64si sgn;
|
|
||||||
v64sf y;
|
|
||||||
|
|
||||||
if (cosine)
|
|
||||||
{
|
|
||||||
sgn = VECTOR_INIT (0);
|
|
||||||
y = __builtin_gcn_fabsvf (x) + HALF_PI;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sgn = x < 0.0f;
|
|
||||||
y = VECTOR_MERGE (-x, x, x < 0.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check for values of y that will overflow here. */
|
|
||||||
VECTOR_IF (y > YMAX, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Calculate the exponent. */
|
|
||||||
v64si Nneg = __builtin_convertvector (y * ONE_OVER_PI - 0.5f, v64si);
|
|
||||||
v64si Npos = __builtin_convertvector (y * ONE_OVER_PI + 0.5f, v64si);
|
|
||||||
v64si N = VECTOR_MERGE (Nneg, Npos, y < 0.0f);
|
|
||||||
v64sf XN = __builtin_convertvector (N, v64sf);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (sgn, ~sgn, (N & 1) != 0);
|
|
||||||
|
|
||||||
if (cosine)
|
|
||||||
XN -= 0.5;
|
|
||||||
|
|
||||||
y = __builtin_gcn_fabsvf (x) - XN * (float) __PI;
|
|
||||||
|
|
||||||
v64sf res;
|
|
||||||
|
|
||||||
VECTOR_IF ((-z_rooteps_f < y) & (y < z_rooteps_f), cond)
|
|
||||||
VECTOR_COND_MOVE (res, y, cond);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64sf g = y * y;
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64sf R = (((r[3] * g + r[2]) * g + r[1]) * g + r[0]) * g;
|
|
||||||
|
|
||||||
/* Finally, compute the result. */
|
|
||||||
VECTOR_COND_MOVE (res, y + y * R, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, sgn);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,98 +0,0 @@
|
||||||
/******************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
******************************************************************/
|
|
||||||
|
|
||||||
/* Based in newlib/libm/mathfp/sf_sineh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_expf_aux (v64sf, v64si);
|
|
||||||
v64si v64sf_numtestf (v64sf);
|
|
||||||
v64si v64sf_isposf (v64sf);
|
|
||||||
|
|
||||||
static const float q[] = { -0.428277109e+2 };
|
|
||||||
static const float p[] = { -0.713793159e+1,
|
|
||||||
-0.190333399 };
|
|
||||||
static const float LNV = 0.6931610107;
|
|
||||||
static const float INV_V2 = 0.2499930850;
|
|
||||||
static const float V_OVER2_MINUS1 = 0.1383027787e-4;
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, sinehf, v64sf x, int cosineh)
|
|
||||||
{
|
|
||||||
const float WBAR = 18.55;
|
|
||||||
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si sgn = VECTOR_INIT (0);
|
|
||||||
v64si v_cosineh = VECTOR_INIT (cosineh ? -1 : 0);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si num_type = v64sf_numtestf (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity_f.f),
|
|
||||||
VECTOR_INIT (-z_infinity_f.f),
|
|
||||||
v64sf_isposf (x)),
|
|
||||||
cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf y = __builtin_gcn_fabsvf (x);
|
|
||||||
|
|
||||||
if (!cosineh)
|
|
||||||
VECTOR_COND_MOVE (sgn, VECTOR_INIT (-1), x < 0.0f);
|
|
||||||
|
|
||||||
v64sf res;
|
|
||||||
|
|
||||||
VECTOR_IF (((y > 1.0f) & ~v_cosineh) | v_cosineh, cond)
|
|
||||||
VECTOR_IF2 (y > (float) BIGX, cond2, cond)
|
|
||||||
v64sf w = y - LNV;
|
|
||||||
|
|
||||||
/* Check for w > maximum here. */
|
|
||||||
VECTOR_IF2 (w > (float) BIGX, cond3, cond2)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (x, cond3);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
v64sf z = v64sf_expf_aux (w, __mask);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, z * (V_OVER2_MINUS1 + 1.0f),
|
|
||||||
cond2 & (w > WBAR));
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
v64sf z = v64sf_expf_aux (y, __mask);
|
|
||||||
if (cosineh) {
|
|
||||||
VECTOR_COND_MOVE (res, (z + 1 / z) * 0.5f, cond2);
|
|
||||||
} else {
|
|
||||||
VECTOR_COND_MOVE (res, (z - 1 / z) * 0.5f, cond2);
|
|
||||||
}
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, sgn);
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
/* Check for y being too small. */
|
|
||||||
VECTOR_IF2 (y < z_rooteps_f, cond2, cond);
|
|
||||||
VECTOR_COND_MOVE (res, x, cond2);
|
|
||||||
VECTOR_ELSE2 (cond2, cond)
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
v64sf f = x * x;
|
|
||||||
v64sf Q = f + q[0];
|
|
||||||
v64sf P = p[1] * f + p[0];
|
|
||||||
v64sf R = f * (P / Q);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, x + x * R, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,12 +0,0 @@
|
||||||
/* Based on newlib/libm/mathfp/sf_sinh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_sinehf_aux (v64sf, int, v64si);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, sinhf, v64sf x)
|
|
||||||
{
|
|
||||||
return v64sf_sinehf_aux (x, 0, __mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sinhf, sf, sf)
|
|
|
@ -1,74 +0,0 @@
|
||||||
/*****************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_sqrt.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64si v64sf_numtestf (v64sf);
|
|
||||||
v64si v64sf_isposf (v64sf);
|
|
||||||
|
|
||||||
#if defined (__has_builtin) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpvf_mant) \
|
|
||||||
&& __has_builtin (__builtin_gcn_frexpvf_exp) \
|
|
||||||
&& __has_builtin (__builtin_gcn_ldexpvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, sqrtf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
/* Check for special values. */
|
|
||||||
v64si num_type = v64sf_numtestf (x);
|
|
||||||
VECTOR_IF (num_type == NAN, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (x, cond);
|
|
||||||
VECTOR_ELSEIF (num_type == INF, cond)
|
|
||||||
VECTOR_IF2 (v64sf_isposf (x), cond2, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond2);
|
|
||||||
VECTOR_ELSE2 (cond2,cond)
|
|
||||||
errno = ERANGE;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_infinity_f.f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Initial checks are performed here. */
|
|
||||||
VECTOR_IF (x == 0.0f, cond)
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (0.0f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF (x < 0.0f, cond)
|
|
||||||
errno = EDOM;
|
|
||||||
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
/* Find the exponent and mantissa for the form x = f * 2^exp. */
|
|
||||||
v64sf f = __builtin_gcn_frexpvf_mant (x);
|
|
||||||
v64si exp = __builtin_gcn_frexpvf_exp (x);
|
|
||||||
v64si odd = (exp & 1) != 0;
|
|
||||||
|
|
||||||
/* Get the initial approximation. */
|
|
||||||
v64sf y = 0.41731f + 0.59016f * f;
|
|
||||||
|
|
||||||
f *= 0.5f;
|
|
||||||
/* Calculate the remaining iterations. */
|
|
||||||
y = y * 0.5f + f / y;
|
|
||||||
y = y * 0.5f + f / y;
|
|
||||||
|
|
||||||
/* Calculate the final value. */
|
|
||||||
VECTOR_COND_MOVE (y, y * (float) __SQRT_HALF, odd);
|
|
||||||
VECTOR_COND_MOVE (exp, exp + 1, odd);
|
|
||||||
exp >>= 1;
|
|
||||||
y = __builtin_gcn_ldexpvf (y, exp);
|
|
||||||
|
|
||||||
VECTOR_RETURN (y, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (sqrtf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,138 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/kf_tan.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
static const float
|
|
||||||
one = 1.0000000000e+00, /* 0x3f800000 */
|
|
||||||
pio4 = 7.8539812565e-01, /* 0x3f490fda */
|
|
||||||
pio4lo= 3.7748947079e-08, /* 0x33222168 */
|
|
||||||
T[] = {
|
|
||||||
3.3333334327e-01, /* 0x3eaaaaab */
|
|
||||||
1.3333334029e-01, /* 0x3e088889 */
|
|
||||||
5.3968254477e-02, /* 0x3d5d0dd1 */
|
|
||||||
2.1869488060e-02, /* 0x3cb327a4 */
|
|
||||||
8.8632395491e-03, /* 0x3c11371f */
|
|
||||||
3.5920790397e-03, /* 0x3b6b6916 */
|
|
||||||
1.4562094584e-03, /* 0x3abede48 */
|
|
||||||
5.8804126456e-04, /* 0x3a1a26c8 */
|
|
||||||
2.4646313977e-04, /* 0x398137b9 */
|
|
||||||
7.8179444245e-05, /* 0x38a3f445 */
|
|
||||||
7.1407252108e-05, /* 0x3895c07a */
|
|
||||||
-1.8558637748e-05, /* 0xb79bae5f */
|
|
||||||
2.5907305826e-05, /* 0x37d95384 */
|
|
||||||
};
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
static v64sf
|
|
||||||
v64sf_kernel_tanf (v64sf x, v64sf y, v64si iy, v64si __mask)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si hx;
|
|
||||||
GET_FLOAT_WORD(hx, x, NO_COND);
|
|
||||||
v64si ix = hx & 0x7fffffff; /* high word of |x| */
|
|
||||||
|
|
||||||
VECTOR_IF(ix<0x31800000, cond) /* x < 2**-28 */
|
|
||||||
VECTOR_IF2(__builtin_convertvector (x, v64si)==0, cond2, cond) /* generate inexact */
|
|
||||||
VECTOR_RETURN (1.0f / __builtin_gcn_fabsvf (x), (ix|(iy+1))==0);
|
|
||||||
VECTOR_RETURN (x, cond2 & (iy == 1));
|
|
||||||
VECTOR_RETURN (-1.0f / x, cond2);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_IF(ix>=0x3f2ca140, cond) /* |x|>=0.6744 */
|
|
||||||
VECTOR_COND_MOVE (x, -x, cond & (hx < 0));
|
|
||||||
VECTOR_COND_MOVE (y, -y, cond & (hx < 0));
|
|
||||||
v64sf z = pio4-x;
|
|
||||||
v64sf w = pio4lo-y;
|
|
||||||
VECTOR_COND_MOVE (x, z+w, cond);
|
|
||||||
VECTOR_COND_MOVE (y, VECTOR_INIT (0.0f), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
v64sf z = x*x;
|
|
||||||
v64sf w = z*z;
|
|
||||||
/* Break x^5*(T[1]+x^2*T[2]+...) into
|
|
||||||
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
|
|
||||||
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
|
|
||||||
*/
|
|
||||||
v64sf r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
|
|
||||||
v64sf v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
|
|
||||||
v64sf s = z*x;
|
|
||||||
r = y + z*(s*(r+v)+y);
|
|
||||||
r += T[0]*s;
|
|
||||||
w = x+r;
|
|
||||||
VECTOR_IF(ix>=0x3f2ca140, cond)
|
|
||||||
v = __builtin_convertvector (iy, v64sf);
|
|
||||||
VECTOR_RETURN (__builtin_convertvector (1-((hx>>30)&2), v64sf)
|
|
||||||
* (v-2.0f*(x-(w*w/(w+v)-r))), cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
VECTOR_RETURN (w, iy == 1);
|
|
||||||
/* if allow error up to 2 ulp,
|
|
||||||
simply return -1.0/(x+r) here */
|
|
||||||
/* compute -1.0/(x+r) accurately */
|
|
||||||
z = w;
|
|
||||||
v64si i;
|
|
||||||
GET_FLOAT_WORD(i,z, NO_COND);
|
|
||||||
SET_FLOAT_WORD(z,i&0xfffff000, NO_COND);
|
|
||||||
v = r - (z - x); /* z+v = r+x */
|
|
||||||
v64sf a, t;
|
|
||||||
t = a = -1.0f/w; /* a = -1.0/w */
|
|
||||||
GET_FLOAT_WORD(i,t, NO_COND);
|
|
||||||
SET_FLOAT_WORD(t,i&0xfffff000, NO_COND);
|
|
||||||
s = 1.0f+t*z;
|
|
||||||
VECTOR_RETURN (t+a*(s+t*v), NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
static v64si
|
|
||||||
v64sf_rem_pio2f (v64sf x, v64sf *y)
|
|
||||||
{
|
|
||||||
/* Work in double-precision for better accuracy. */
|
|
||||||
v64df dx = __builtin_convertvector (x, v64df);
|
|
||||||
v64df r = dx * __INV_PI_OVER_TWO_2_24;
|
|
||||||
v64si n = (__builtin_convertvector (r, v64si) + 0x800000) >> 24;
|
|
||||||
dx = dx - __builtin_convertvector (n, v64df) * __PI_OVER_TWO;
|
|
||||||
|
|
||||||
y[0] = __builtin_convertvector (dx, v64sf);
|
|
||||||
y[1] = __builtin_convertvector (dx, v64sf) - y[0];
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, tanf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64si ix;
|
|
||||||
GET_FLOAT_WORD (ix, x, NO_COND);
|
|
||||||
|
|
||||||
/* |x| ~< pi/4 */
|
|
||||||
ix &= 0x7fffffff;
|
|
||||||
VECTOR_RETURN (v64sf_kernel_tanf (x, VECTOR_INIT (0.0f), VECTOR_INIT (1), __mask),
|
|
||||||
ix <= 0x3f490fda);
|
|
||||||
|
|
||||||
/* tan(Inf or NaN) is NaN */
|
|
||||||
VECTOR_RETURN (x-x, ~FLT_UWORD_IS_FINITE(ix)); /* NaN */
|
|
||||||
|
|
||||||
/* argument reduction needed */
|
|
||||||
v64sf y[2];
|
|
||||||
v64si n = v64sf_rem_pio2f (x,y);
|
|
||||||
VECTOR_RETURN (v64sf_kernel_tanf (y[0], y[1], 1-((n&1)<<1), __mask), // 1 -- n even
|
|
||||||
NO_COND); // -1 -- n odd
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tanf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,63 +0,0 @@
|
||||||
/*****************************************************************
|
|
||||||
* The following routines are coded directly from the algorithms
|
|
||||||
* and coefficients given in "Software Manual for the Elementary
|
|
||||||
* Functions" by William J. Cody, Jr. and William Waite, Prentice
|
|
||||||
* Hall, 1980.
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/mathfp/sf_tanh.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_expf_aux (v64sf, v64si);
|
|
||||||
|
|
||||||
static const float LN3_OVER2 = 0.54930614433405484570;
|
|
||||||
static const float p[] = { -0.16134119023996228053e+4,
|
|
||||||
-0.99225929672236083313e+2,
|
|
||||||
-0.96437492777225469787 };
|
|
||||||
static const float q[] = { 0.48402357071988688686e+4,
|
|
||||||
0.22337720718962312926e+4,
|
|
||||||
0.11274474380534949335e+3 };
|
|
||||||
|
|
||||||
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, tanhf, v64sf x)
|
|
||||||
{
|
|
||||||
FUNCTION_INIT (v64sf);
|
|
||||||
|
|
||||||
v64sf f = __builtin_gcn_fabsvf (x);
|
|
||||||
v64sf res;
|
|
||||||
|
|
||||||
/* Check if the input is too big. */
|
|
||||||
VECTOR_IF (f > (float) BIGX, cond)
|
|
||||||
VECTOR_COND_MOVE (res, VECTOR_INIT (1.0f), cond);
|
|
||||||
|
|
||||||
VECTOR_ELSEIF (f > LN3_OVER2, cond)
|
|
||||||
VECTOR_COND_MOVE (res, 1.0f - 2.0f / (v64sf_expf_aux (2.0f * f, __mask) + 1.0f),
|
|
||||||
cond);
|
|
||||||
|
|
||||||
/* Check if the input is too small. */
|
|
||||||
VECTOR_ELSEIF (f < z_rooteps_f, cond)
|
|
||||||
VECTOR_COND_MOVE (res, f, cond);
|
|
||||||
|
|
||||||
/* Calculate the Taylor series. */
|
|
||||||
VECTOR_ELSE (cond)
|
|
||||||
v64sf g = f * f;
|
|
||||||
|
|
||||||
v64sf P = (p[2] * g + p[1]) * g + p[0];
|
|
||||||
v64sf Q = ((g + q[2]) * g + q[1]) * g + q[0];
|
|
||||||
v64sf R = g * (P / Q);
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, f + f * R, cond);
|
|
||||||
VECTOR_ENDIF
|
|
||||||
|
|
||||||
VECTOR_COND_MOVE (res, -res, x < 0.0f);
|
|
||||||
|
|
||||||
VECTOR_RETURN (res, NO_COND);
|
|
||||||
|
|
||||||
FUNCTION_RETURN;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tanhf, sf, sf)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,28 +0,0 @@
|
||||||
/*
|
|
||||||
* ====================================================
|
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
||||||
*
|
|
||||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
||||||
* Permission to use, copy, modify, and distribute this
|
|
||||||
* software is freely granted, provided that this notice
|
|
||||||
* is preserved.
|
|
||||||
* ====================================================
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Based on newlib/libm/math/ef_tgamma.c in Newlib. */
|
|
||||||
|
|
||||||
#include "amdgcnmach.h"
|
|
||||||
|
|
||||||
v64sf v64sf_expf_aux (v64sf x, v64si __mask);
|
|
||||||
v64sf v64sf_lgammaf_r_aux (v64sf x, v64si *signgamp, v64si __mask);
|
|
||||||
|
|
||||||
DEF_VS_MATH_FUNC (v64sf, tgammaf, v64sf x)
|
|
||||||
{
|
|
||||||
v64si signgam_local;
|
|
||||||
v64sf y = v64sf_expf_aux(v64sf_lgammaf_r_aux(x, &signgam_local, __mask), __mask);
|
|
||||||
VECTOR_COND_MOVE(y, -y, signgam_local < 0);
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEF_VARIANTS (tgammaf, sf, sf)
|
|
Loading…
Reference in New Issue