4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-02-21 00:07:36 +08:00

amdgcn: Add vectorized math routines

This implements a set of vectorized math routines to be used by the
compiler auto-vectorizer.  Versions for vectors with 2 lanes up to
64 lanes (in powers of 2) are provided.

These routines are based on the scalar versions of the math routines in
libm/common, libm/math and libm/mathfp.  They make extensive use of the GCC
C vector extensions and GCN-specific builtins in GCC.
This commit is contained in:
Kwok Cheung Yeung 2022-12-02 20:00:11 +00:00 committed by Jeff Johnston
parent 31302b267b
commit e18743072b
96 changed files with 9990 additions and 46 deletions

File diff suppressed because one or more lines are too long

14
newlib/configure vendored
View File

@ -623,6 +623,8 @@ HAVE_LIBM_MACHINE_I386_FALSE
HAVE_LIBM_MACHINE_I386_TRUE
HAVE_LIBM_MACHINE_ARM_FALSE
HAVE_LIBM_MACHINE_ARM_TRUE
HAVE_LIBM_MACHINE_AMDGCN_FALSE
HAVE_LIBM_MACHINE_AMDGCN_TRUE
HAVE_LIBM_MACHINE_AARCH64_FALSE
HAVE_LIBM_MACHINE_AARCH64_TRUE
HAS_NDS32_FPU_DP_FALSE
@ -6216,6 +6218,14 @@ else
HAVE_LIBM_MACHINE_AARCH64_FALSE=
fi
if test "${libm_machine_dir}" = "amdgcn"; then
HAVE_LIBM_MACHINE_AMDGCN_TRUE=
HAVE_LIBM_MACHINE_AMDGCN_FALSE='#'
else
HAVE_LIBM_MACHINE_AMDGCN_TRUE='#'
HAVE_LIBM_MACHINE_AMDGCN_FALSE=
fi
if test "${libm_machine_dir}" = "arm"; then
HAVE_LIBM_MACHINE_ARM_TRUE=
HAVE_LIBM_MACHINE_ARM_FALSE='#'
@ -7875,6 +7885,10 @@ if test -z "${HAVE_LIBM_MACHINE_AARCH64_TRUE}" && test -z "${HAVE_LIBM_MACHINE_A
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_AARCH64\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${HAVE_LIBM_MACHINE_AMDGCN_TRUE}" && test -z "${HAVE_LIBM_MACHINE_AMDGCN_FALSE}"; then
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_AMDGCN\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${HAVE_LIBM_MACHINE_ARM_TRUE}" && test -z "${HAVE_LIBM_MACHINE_ARM_FALSE}"; then
as_fn_error $? "conditional \"HAVE_LIBM_MACHINE_ARM\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5

View File

@ -119,6 +119,7 @@ case "${host_cpu}" in
amdgcn*)
newlib_cflags="${newlib_cflags} -D__DYNAMIC_REENT__"
machine_dir=amdgcn
libm_machine_dir=amdgcn
newlib_cv_initfinit_array=yes
;;
arc*)

View File

@ -51,6 +51,9 @@ include %D%/test/Makefile.inc
if HAVE_LIBM_MACHINE_AARCH64
include %D%/machine/aarch64/Makefile.inc
endif
if HAVE_LIBM_MACHINE_AMDGCN
include %D%/machine/amdgcn/Makefile.inc
endif
if HAVE_LIBM_MACHINE_ARM
include %D%/machine/arm/Makefile.inc
endif

View File

@ -4,7 +4,7 @@ m4_include([libm/machine/nds32/acinclude.m4])
dnl Define HAVE_LIBM_MACHINE_<machine> automake conditionals.
m4_foreach_w([MACHINE], [
aarch64 arm i386 mips nds32 powerpc pru sparc spu riscv x86_64
aarch64 amdgcn arm i386 mips nds32 powerpc pru sparc spu riscv x86_64
], [dnl
AM_CONDITIONAL([HAVE_LIBM_MACHINE_]m4_toupper(MACHINE), test "${libm_machine_dir}" = "MACHINE")
])

View File

@ -0,0 +1,91 @@
%C%_src = \
%D%/v64_mathcnst.c \
%D%/v64_reent.c \
%D%/v64df_acos.c \
%D%/v64df_acosh.c \
%D%/v64df_asin.c \
%D%/v64df_asine.c \
%D%/v64df_asinh.c \
%D%/v64df_atan.c \
%D%/v64df_atan2.c \
%D%/v64df_atangent.c \
%D%/v64df_atanh.c \
%D%/v64df_copysign.c \
%D%/v64df_cos.c \
%D%/v64df_cosh.c \
%D%/v64df_erf.c \
%D%/v64df_exp.c \
%D%/v64df_exp2.c \
%D%/v64df_finite.c \
%D%/v64df_fmod.c \
%D%/v64df_gamma.c \
%D%/v64df_hypot.c \
%D%/v64df_ilogb.c \
%D%/v64df_isnan.c \
%D%/v64df_ispos.c \
%D%/v64df_lgamma.c \
%D%/v64df_lgamma_r.c \
%D%/v64df_log.c \
%D%/v64df_log10.c \
%D%/v64df_log2.c \
%D%/v64df_modf.c \
%D%/v64df_numtest.c \
%D%/v64df_pow.c \
%D%/v64df_remainder.c \
%D%/v64df_rint.c \
%D%/v64df_scalb.c \
%D%/v64df_scalbn.c \
%D%/v64df_signif.c \
%D%/v64df_sin.c \
%D%/v64df_sine.c \
%D%/v64df_sineh.c \
%D%/v64df_sinh.c \
%D%/v64df_sqrt.c \
%D%/v64df_tan.c \
%D%/v64df_tanh.c \
%D%/v64df_tgamma.c \
%D%/v64sf_acos.c \
%D%/v64sf_acosh.c \
%D%/v64sf_asin.c \
%D%/v64sf_asine.c \
%D%/v64sf_asinh.c \
%D%/v64sf_atan.c \
%D%/v64sf_atan2.c \
%D%/v64sf_atangent.c \
%D%/v64sf_atanh.c \
%D%/v64sf_copysign.c \
%D%/v64sf_cos.c \
%D%/v64sf_cosh.c \
%D%/v64sf_erf.c \
%D%/v64sf_exp.c \
%D%/v64sf_exp2.c \
%D%/v64sf_finite.c \
%D%/v64sf_fmod.c \
%D%/v64sf_gamma.c \
%D%/v64sf_hypot.c \
%D%/v64sf_ilogb.c \
%D%/v64sf_isnan.c \
%D%/v64sf_ispos.c \
%D%/v64sf_lgamma.c \
%D%/v64sf_lgamma_r.c \
%D%/v64sf_log.c \
%D%/v64sf_log10.c \
%D%/v64sf_log2.c \
%D%/v64sf_modf.c \
%D%/v64sf_numtest.c \
%D%/v64sf_pow.c \
%D%/v64sf_remainder.c \
%D%/v64sf_rint.c \
%D%/v64sf_scalb.c \
%D%/v64sf_scalbn.c \
%D%/v64sf_signif.c \
%D%/v64sf_sin.c \
%D%/v64sf_sine.c \
%D%/v64sf_sineh.c \
%D%/v64sf_sinh.c \
%D%/v64sf_sqrt.c \
%D%/v64sf_tan.c \
%D%/v64sf_tanh.c \
%D%/v64sf_tgamma.c
libm_a_SOURCES += $(%C%_src)

View File

@ -0,0 +1,321 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* Macro library used to help during conversion of scalar math functions to
vectorized SIMD equivalents on AMD GCN. */
/* Optimization at -O2 and above currently result in ICEs when converting
between vector types. */
#pragma GCC optimize ("O1")
typedef float v2sf __attribute__ ((vector_size (8)));
typedef float v4sf __attribute__ ((vector_size (16)));
typedef float v8sf __attribute__ ((vector_size (32)));
typedef float v16sf __attribute__ ((vector_size (64)));
typedef float v32sf __attribute__ ((vector_size (128)));
typedef float v64sf __attribute__ ((vector_size (256)));
typedef double v2df __attribute__ ((vector_size (16)));
typedef double v4df __attribute__ ((vector_size (32)));
typedef double v8df __attribute__ ((vector_size (64)));
typedef double v16df __attribute__ ((vector_size (128)));
typedef double v32df __attribute__ ((vector_size (256)));
typedef double v64df __attribute__ ((vector_size (512)));
typedef int v2si __attribute__ ((vector_size (8)));
typedef int v4si __attribute__ ((vector_size (16)));
typedef int v8si __attribute__ ((vector_size (32)));
typedef int v16si __attribute__ ((vector_size (64)));
typedef int v32si __attribute__ ((vector_size (128)));
typedef int v64si __attribute__ ((vector_size (256)));
typedef unsigned int v64usi __attribute__ ((vector_size (256)));
typedef long v2di __attribute__ ((vector_size (16)));
typedef long v4di __attribute__ ((vector_size (32)));
typedef long v8di __attribute__ ((vector_size (64)));
typedef long v16di __attribute__ ((vector_size (128)));
typedef long v32di __attribute__ ((vector_size (256)));
typedef long v64di __attribute__ ((vector_size (512)));
typedef union {
v2sf t_v2sf;
v4sf t_v4sf;
v8sf t_v8sf;
v16sf t_v16sf;
v32sf t_v32sf;
v64sf t_v64sf;
v2df t_v2df;
v4df t_v4df;
v8df t_v8df;
v16df t_v16df;
v32df t_v32df;
v64df t_v64df;
v2si t_v2si;
v4si t_v4si;
v8si t_v8si;
v16si t_v16si;
v32si t_v32si;
v64si t_v64si;
v64usi t_v64usi;
v2di t_v2di;
v4di t_v4di;
v8di t_v8di;
v16di t_v16di;
v32di t_v32di;
v64di t_v64di;
} vector_union;
/* Cast between vectors with a different number of elements. */
#define RESIZE_VECTOR(to_t, from) \
({ \
__auto_type __from = (from); \
*((to_t *) &__from); \
})
/* Bit-wise cast vector FROM to type TO_T. */
#define CAST_VECTOR(to_t, from) \
({ \
_Static_assert (sizeof (to_t) == sizeof (from)); \
union { \
typeof (from) __from; \
to_t __to; \
} __tmp; \
__tmp.__from = (from); \
__tmp.__to; \
})
#define NO_COND __mask
/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */
#define VECTOR_MERGE(vec1, vec2, cond) \
({ \
_Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \
union { \
typeof (vec1) val; \
v64si t_v64si; \
v64di t_v64di; \
} __vec1, __vec2, __res; \
__vec1.val = (vec1); \
__vec2.val = (vec2); \
__builtin_choose_expr ( \
sizeof (vec1) == sizeof (v64si), \
({ \
v64si __bitmask = __builtin_convertvector ((cond), v64si); \
__res.t_v64si = (__vec1.t_v64si & __bitmask) \
| (__vec2.t_v64si & ~__bitmask); \
}), \
({ \
v64di __bitmask = __builtin_convertvector ((cond), v64di); \
__res.t_v64di = (__vec1.t_v64di & __bitmask) \
| (__vec2.t_v64di & ~__bitmask); \
})); \
__res.val; \
})
#define VECTOR_RETURN(retval, cond) \
do { \
_Static_assert (__builtin_types_compatible_p (typeof (retval), typeof (__ret))); \
__auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
__auto_type __retval = (retval); \
VECTOR_COND_MOVE (__ret, __retval, __cond); \
__mask &= ~__cond; \
} while (0)
#define VECTOR_COND_MOVE(var, val, cond) \
do { \
_Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \
__auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
var = VECTOR_MERGE ((val), var, __cond & __mask); \
} while (0)
#define VECTOR_IF(cond, cond_var) \
{ \
__auto_type cond_var = (cond); \
__auto_type __inv_cond = ~cond_var; \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSEIF(cond, cond_var) \
} \
cond_var = __inv_cond & (cond); \
__inv_cond &= ~(cond); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSE(cond_var) \
} \
cond_var = __inv_cond; \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_IF2(cond, cond_var, prev_cond_var) \
{ \
__auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \
__auto_type __inv_cond = ~(cond); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \
} \
cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \
__inv_cond &= ~(cond); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ELSE2(cond_var, prev_cond_var) \
} \
cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \
if (!ALL_ZEROES_P (cond_var)) \
{
#define VECTOR_ENDIF \
} \
}
#define VECTOR_INIT_AUX(x, type) \
({ \
typeof (x) __e = (x); \
type __tmp = { \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e, \
__e, __e, __e, __e, __e, __e, __e, __e }; \
__tmp; \
})
#define VECTOR_INIT(x) \
(_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \
unsigned: VECTOR_INIT_AUX ((x), v64usi), \
long: VECTOR_INIT_AUX ((x), v64di), \
float: VECTOR_INIT_AUX ((x), v64sf), \
double: VECTOR_INIT_AUX ((x), v64df)))
#define VECTOR_WIDTH(TYPE) (sizeof (TYPE) / (V_SF_SI_P (TYPE) ? 4 : 8))
#define V_SF_SI_P(TYPE) \
(__builtin_types_compatible_p (TYPE, v2sf) \
|| __builtin_types_compatible_p (TYPE, v4sf) \
|| __builtin_types_compatible_p (TYPE, v8sf) \
|| __builtin_types_compatible_p (TYPE, v16sf) \
|| __builtin_types_compatible_p (TYPE, v32sf) \
|| __builtin_types_compatible_p (TYPE, v64sf) \
|| __builtin_types_compatible_p (TYPE, v2si) \
|| __builtin_types_compatible_p (TYPE, v4si) \
|| __builtin_types_compatible_p (TYPE, v8si) \
|| __builtin_types_compatible_p (TYPE, v16si) \
|| __builtin_types_compatible_p (TYPE, v32si) \
|| __builtin_types_compatible_p (TYPE, v64si))
#define VECTOR_INIT_MASK(TYPE) \
({ \
vector_union __mask; \
__mask.t_v64di = VECTOR_INIT (0L); \
for (int i = 0; i < VECTOR_WIDTH (TYPE); i++) \
__mask.t_v64di[i] = -1; \
__builtin_choose_expr (V_SF_SI_P (TYPE), __mask.t_v64si, __mask.t_v64di); \
})
#define ALL_ZEROES_P(x) (COND_TO_BITMASK(x) == 0)
#define COND_TO_BITMASK(x) \
({ \
long __tmp = 0; \
__auto_type __x = __builtin_convertvector((x), typeof (__mask)) & __mask; \
__builtin_choose_expr (sizeof (__mask) == 256, \
({ asm ("v_cmp_ne_u32_e64 %0, %1, 0" \
: "=Sg" (__tmp) \
: "v" (__x)); }), \
({ asm ("v_cmp_ne_u64_e64 %0, %1, 0" \
: "=Sg" (__tmp) \
: "v" (__x)); })); \
__tmp; \
})
#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
{ \
__auto_type cond_var = prev_cond_var; \
for (;;) { \
cond_var &= (cond); \
if (ALL_ZEROES_P (cond_var)) \
break;
#define VECTOR_ENDWHILE \
} \
}
#define DEF_VS_MATH_FUNC(rettype, name, args...) \
rettype v64sf##_##name##_aux (args, v64si __mask)
#define DEF_VD_MATH_FUNC(rettype, name, args...) \
rettype v64df##_##name##_aux (args, v64di __mask)
/* Use this for predicate functions that take a vector of doubles but
return a vector of ints. */
#define DEF_VD_MATH_PRED(rettype, name, args...) \
rettype v64df##_##name##_aux (args, v64si __mask)
#define FUNCTION_INIT(rettype) \
rettype __ret
#define FUNCTION_RETURN \
return __ret
#define DEF_VARIANT(FUN, TRET, TARG, COUNT) \
v##COUNT##TRET \
v##COUNT##TARG##_##FUN (v##COUNT##TARG __arg) \
{ \
__auto_type __upsized_arg = RESIZE_VECTOR (v64##TARG, __arg); \
__auto_type __mask = VECTOR_INIT_MASK (v##COUNT##TRET); \
__auto_type __result = v64##TARG##_##FUN##_aux (__upsized_arg, __mask); \
return RESIZE_VECTOR (v##COUNT##TRET, __result); \
}
#define DEF_VARIANT2(FUN, TRET, TARG, COUNT) \
v##COUNT##TRET \
v##COUNT##TARG##_##FUN (v##COUNT##TARG __arg1, v##COUNT##TARG __arg2) \
{ \
__auto_type __upsized_arg1 = RESIZE_VECTOR (v64##TARG, __arg1); \
__auto_type __upsized_arg2 = RESIZE_VECTOR (v64##TARG, __arg2); \
__auto_type __mask = VECTOR_INIT_MASK (v##COUNT##TRET); \
__auto_type __result = v64##TARG##_##FUN##_aux (__upsized_arg1, __upsized_arg2, __mask); \
return RESIZE_VECTOR (v##COUNT##TRET, __result); \
}
#define DEF_VARIANTS(FUN, RETTYPE, ARGTYPE) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 2) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 4) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 8) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 16) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 32) \
DEF_VARIANT (FUN, RETTYPE, ARGTYPE, 64)
#define DEF_VARIANTS2(FUN, RETTYPE, ARGTYPE) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 2) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 4) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 8) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 16) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 32) \
DEF_VARIANT2 (FUN, RETTYPE, ARGTYPE, 64)

View File

@ -0,0 +1,242 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* Common header file for AMD GCN vector math routines. */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Copyright (c) 2017-2018 Arm Ltd. All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* This header is partially based on:
newlib/libm/common/fdlibm.h
newlib/libm/mathfp/zmath.h
newlib/libm/common/math_errf.c
newlib/libm/common/math_config.h */
#include <errno.h>
#include <sys/types.h>
#include <machine/ieeefp.h>
#include "amdgcn_veclib.h"
/* Vectorized versions of macros from newlib/libm/common/fdlibm.h */
#define EXTRACT_WORDS(hi, lo, x) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (x); \
hi = __builtin_convertvector (__tmp.t_v64di >> 32, typeof (hi)); \
lo = __builtin_convertvector (__tmp.t_v64di & 0xffffffff, typeof (lo)); \
} while (0)
#define INSERT_WORDS(x, hi, lo, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64di = __builtin_convertvector (hi, v64di) << 32 | \
__builtin_convertvector (lo, v64di) & 0xffffffff; \
VECTOR_COND_MOVE (x, __tmp.t_v64df, cond); \
} while (0)
#define GET_HIGH_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (y); \
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di >> 32, v64si), \
(cond)); \
} while (0)
#define GET_LOW_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (y); \
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di & 0xffffffff, \
v64si), (cond)); \
} while (0)
#define SET_HIGH_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = x; \
__tmp.t_v64di &= 0xffffffff; \
__tmp.t_v64di |= __builtin_convertvector (y, v64di) << 32; \
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
} while (0)
#define SET_LOW_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = x; \
__tmp.t_v64di &= 0xffffffff00000000ULL; \
__tmp.t_v64di |= __builtin_convertvector (y, v64di); \
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
} while (0)
#define GET_FLOAT_WORD(x, y, cond) \
VECTOR_COND_MOVE(x, CAST_VECTOR(v64si, (y)), (cond))
#define SET_FLOAT_WORD(x, y, cond) \
VECTOR_COND_MOVE(x, CAST_VECTOR(v64sf, (y)), (cond))
/* Definitions from newlib/libm/common/fdlibm.h */
#ifdef _FLT_LARGEST_EXPONENT_IS_NORMAL
#define FLT_UWORD_IS_FINITE(x) ((x) == (x))
#define FLT_UWORD_IS_NAN(x) ((x) != (x))
#define FLT_UWORD_IS_INFINITE(x) ((x) != (x))
#define FLT_UWORD_MAX 0x7fffffff
#define FLT_UWORD_EXP_MAX 0x43010000
#define FLT_UWORD_LOG_MAX 0x42b2d4fc
#define FLT_UWORD_LOG_2MAX 0x42b437e0
#define HUGE ((float)0X1.FFFFFEP128)
#else
#define FLT_UWORD_IS_FINITE(x) ((x)<0x7f800000)
#define FLT_UWORD_IS_NAN(x) ((x)>0x7f800000)
#define FLT_UWORD_IS_INFINITE(x) ((x)==0x7f800000)
#define FLT_UWORD_MAX 0x7f7fffffL
#define FLT_UWORD_EXP_MAX 0x43000000
#define FLT_UWORD_LOG_MAX 0x42b17217
#define FLT_UWORD_LOG_2MAX 0x42b2d4fc
#define HUGE ((float)3.40282346638528860e+38)
#endif
#define FLT_UWORD_HALF_MAX (FLT_UWORD_MAX-(1L<<23))
#define FLT_LARGEST_EXP (FLT_UWORD_MAX>>23)
#ifdef _FLT_NO_DENORMALS
#define FLT_UWORD_IS_ZERO(x) ((x)<0x00800000)
#define FLT_UWORD_IS_SUBNORMAL(x) ((x) != (x))
#define FLT_UWORD_MIN 0x00800000
#define FLT_UWORD_EXP_MIN 0x42fc0000
#define FLT_UWORD_LOG_MIN 0x42aeac50
#define FLT_SMALLEST_EXP 1
#else
#define FLT_UWORD_IS_ZERO(x) ((x)==0)
#define FLT_UWORD_IS_SUBNORMAL(x) ((x)<0x00800000)
#define FLT_UWORD_MIN 0x00000001
#define FLT_UWORD_EXP_MIN 0x43160000
#define FLT_UWORD_LOG_MIN 0x42cff1b5
#define FLT_SMALLEST_EXP -22
#endif
/* Definitions from newlib/libm/mathfp/zmath.h */
#define NUM 3
#define NAN 2
#define INF 1
#define __PI 3.14159265358979323846
#define __SQRT_HALF 0.70710678118654752440
#define __PI_OVER_TWO 1.57079632679489661923132
#define __INV_PI_OVER_TWO_2_24 10680707.430881743590348355907974
typedef const union
{
unsigned int l[2];
double d;
} udouble;
typedef const union
{
unsigned int l;
float f;
} ufloat;
extern double BIGX;
extern double SMALLX;
extern udouble z_infinity;
extern udouble z_notanum;
extern double z_rooteps;
extern ufloat z_infinity_f;
extern ufloat z_notanum_f;
extern float z_rooteps_f;
/* Vectorized versions of functions from newlib/libm/common/math_errf.c */
static v64sf v64sf_math_oflowf (v64si sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p97f),
VECTOR_INIT (0x1p97f), sign) * 0x1p97f;
}
static v64sf v64sf_math_uflowf (v64si sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p-95f),
VECTOR_INIT (0x1p-95f), sign) * 0x1p-95f;
}
/* Vectorized versions of functions from newlib/libm/common/math_config.h */
static v64si v64sf_issignalingf_inline (v64sf x)
{
v64si __mask = VECTOR_INIT (-1);
v64si ix;
GET_FLOAT_WORD (ix, x, NO_COND);
/* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
significand is 0 for signalling NaN. */
return ((ix & 0x7f800000) == 0x7f800000) & ((ix & 0x00400000) == 0);
}
/* Vector extensions to sys/reent.h */
struct v64_reent {
v64si _v64si_gamma_signgam;
};
extern struct v64_reent *_v64_reent;
#define _V64_REENT _v64_reent
#define _REENT_V64SI_SIGNGAM(ptr) ((ptr)->_v64si_gamma_signgam)
/* Vector extensions to math.h */
#define v64si_signgam (*__v64si_signgam())
extern v64si* __v64si_signgam (void);
#define __v64si_signgam_r(ptr) _REENT_V64SI_SIGNGAM(ptr)

View File

@ -0,0 +1,52 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_mathcnst.c in Newlib. */
#include "amdgcnmach.h"
double BIGX = 7.09782712893383973096e+02;
double SMALLX = -7.45133219101941108420e+02;
double z_rooteps = 7.4505859692e-9;
float z_rooteps_f = 1.7263349182589107e-4;
ufloat z_hugeval_f = { 0x7f800000 };
ufloat z_infinity_f = { 0x7f800000 };
ufloat z_notanum_f = { 0x7fd00000 };
#ifdef __IEEE_BIG_ENDIAN
udouble z_hugeval = { 0x7ff00000, 0 };
udouble z_infinity = { 0x7ff00000, 0 };
udouble z_notanum = { 0xeff80000, 0 };
#else /* __IEEE_LITTLE_ENDIAN */
udouble z_hugeval = { 0, 0x7ff00000 };
udouble z_infinity = { 0, 0x7ff00000 };
udouble z_notanum = { 0, 0x7ff80000 };
#endif /* __IEEE_LITTLE_ENDIAN */

View File

@ -0,0 +1,24 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
#include "amdgcnmach.h"
static struct v64_reent __reent;
struct v64_reent *_v64_reent = &__reent;
v64si*
__v64si_signgam (void)
{
return &_REENT_V64SI_SIGNGAM(_V64_REENT);
}

View File

@ -0,0 +1,42 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_acos.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_asine_aux (v64df x, int acosine, v64di);
DEF_VD_MATH_FUNC (v64df, acos, v64df x)
{
return v64df_asine_aux(x, 1, __mask);
}
DEF_VARIANTS (acos, df, df)

View File

@ -0,0 +1,81 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/mathfp/e_acosh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_log_aux (v64df, v64di);
v64df v64df_log1p_aux (v64df, v64di);
v64df v64df_sqrt_aux (v64df, v64di);
DEF_VD_MATH_FUNC (v64df, acosh, v64df x)
{
static const double one = 1.0;
static const double ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
FUNCTION_INIT (v64df);
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
VECTOR_IF (hx < 0x3ff00000, cond) // x < 1 */
VECTOR_RETURN ((x-x) / (x-x), cond);
VECTOR_ENDIF
VECTOR_IF (hx >=0x41b00000, cond) // x > 2**28 */
VECTOR_IF2 (hx >=0x7ff00000, cond2, cond) // x is inf of NaN */
VECTOR_RETURN (x+x, cond2);
VECTOR_ELSE (cond2)
/* acosh(huge)=log(2x) */
VECTOR_RETURN (v64df_log_aux (x, __mask) + ln2, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (((hx - 0x3ff00000) | lx) == 0, cond)
/* acosh(1) = 0 */
VECTOR_RETURN (VECTOR_INIT (0.0), cond);
VECTOR_ENDIF
VECTOR_IF (hx > 0x40000000, cond) /* 2**28 > x > 2 */
{
v64df t = x * x;
VECTOR_RETURN (v64df_log_aux (2.0*x - one /
(x + v64df_sqrt_aux (t - one, __mask)),
__mask),
cond);
}
VECTOR_ELSE (cond) /* 1<x<2 */
{
v64df t = x - one;
VECTOR_RETURN (v64df_log1p_aux (t + v64df_sqrt_aux(2.0*t + t*t, __mask),
__mask),
cond);
}
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (acosh, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_asin.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_asine_aux (v64df x, int acosine, v64di __mask);
DEF_VD_MATH_FUNC (v64df, asin, v64df x)
{
return v64df_asine_aux (x, 0, __mask);
}
DEF_VARIANTS (asin, df, df)

View File

@ -0,0 +1,135 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_asine.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_numtest (v64df);
v64df v64df_sqrt_aux (v64df, v64di);
static const double p[] = { -0.27368494524164255994e+2,
0.57208227877891731407e+2,
-0.39688862997404877339e+2,
0.10152522233806463645e+2,
-0.69674573447350646411 };
static const double q[] = { -0.16421096714498560795e+3,
0.41714430248260412556e+3,
-0.38186303361750149284e+3,
0.15095270841030604719e+3,
-0.23823859153670238830e+2 };
static const double a[] = { 0.0, 0.78539816339744830962 };
static const double b[] = { 1.57079632679489661923, 0.78539816339744830962 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, asine, v64df x, int acosine)
{
FUNCTION_INIT (v64df);
v64si branch = VECTOR_INIT (0);
/* Check for special values. */
v64si i = v64df_numtest (x);
VECTOR_IF ((i == NAN) | (i == INF), cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_MERGE (x, VECTOR_INIT (z_infinity.d),
i == NAN),
cond);
VECTOR_ENDIF
v64df y = __builtin_gcn_fabsv (x);
v64df g, res;
VECTOR_IF (y > 0.5, cond)
VECTOR_COND_MOVE (i, VECTOR_INIT (1 - acosine), cond);
/* Check for range error. */
VECTOR_IF2 (y > 1.0, cond2, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (g, (1.0 - y) / 2.0, cond);
VECTOR_COND_MOVE (y, -2.0 * v64df_sqrt_aux (g, __mask), cond);
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (i, VECTOR_INIT (acosine), cond);
VECTOR_IF2 (y < z_rooteps, cond2, cond)
VECTOR_COND_MOVE (res, y, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (g, y * y, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF ((y >= z_rooteps) | __builtin_convertvector(branch, v64di), cond)
{
/* Calculate the Taylor series. */
v64df P = ((((p[4] * g + p[3]) * g + p[2]) * g + p[1]) * g + p[0]) * g;
v64df Q = ((((g + q[4]) * g + q[3]) * g + q[2]) * g + q[1]) * g + q[0];
v64df R = P / Q;
VECTOR_COND_MOVE (res, y + y * R, cond);
}
VECTOR_ENDIF
v64df a_i = VECTOR_MERGE (VECTOR_INIT (a[1]), VECTOR_INIT (a[0]), i != 0);
/* Calculate asine or acose. */
if (acosine == 0)
{
VECTOR_COND_MOVE (res, (a_i + res) + a_i, NO_COND);
VECTOR_IF (x < 0.0, cond)
VECTOR_COND_MOVE (res, -res, cond);
VECTOR_ENDIF
}
else
{
v64df b_i = VECTOR_MERGE (VECTOR_INIT(b[1]), VECTOR_INIT(b[0]), i != 0);
VECTOR_IF (x < 0.0, cond)
VECTOR_COND_MOVE (res, (b_i + res) + b_i, cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (res, (a_i - res) + a_i, cond);
VECTOR_ENDIF
}
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,81 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/s_asinh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_log_aux (v64df, v64di);
v64df v64df_log1p_aux (v64df, v64di);
v64df v64df_sqrt_aux (v64df, v64di);
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, asinh, v64df x)
{
static const double one = 1.00000000000000000000e+00; /* 0x3FF00000, 0x00000000 */
static const double ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
static const double huge = 1.00000000000000000000e+300;
FUNCTION_INIT (v64df);
v64df w;
v64si hx;
GET_HIGH_WORD (hx, x, NO_COND);
v64si ix = hx & 0x7fffffff;
VECTOR_IF (ix >=0x7ff00000, cond) /* x is inf or NaN */
VECTOR_RETURN (x + x, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3e300000, cond) /* |x|<2**-28 */
VECTOR_IF2 (__builtin_convertvector(huge+x > one, v64si), cond2, cond) /* return x inexact except 0 */
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (ix > 0x41b00000, cond) /* x > 2**28 */
VECTOR_COND_MOVE (w, v64df_log_aux (__builtin_gcn_fabsv (x), __mask) + ln2,
cond);
VECTOR_ELSEIF (ix > 0x40000000, cond) /* 2**28 > |x| > 2.0 */
v64df t = __builtin_gcn_fabsv (x);
VECTOR_COND_MOVE (w, v64df_log_aux (2.0 * t + one / (v64df_sqrt_aux (x*x + one, __mask) + t), __mask),
cond);
VECTOR_ELSE (cond) /* 2.0 > |x| > 2**-28 */
v64df t = x * x;
VECTOR_COND_MOVE (w, v64df_log1p_aux (__builtin_gcn_fabsv (x) + t / (one + v64df_sqrt_aux (one + t, __mask)), __mask),
cond);
VECTOR_ENDIF
VECTOR_IF (hx > 0, cond)
VECTOR_RETURN (w, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (-w, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (asinh, df, df)
#endif

View File

@ -0,0 +1,43 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_atan.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_atangent_aux (v64df, v64df, v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, atan, v64df x)
{
return v64df_atangent_aux (x,
VECTOR_INIT (0.0),
VECTOR_INIT (0.0), 0, __mask);
}
DEF_VARIANTS (atan, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_atan2.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_atangent_aux (v64df, v64df, v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, atan2, v64df v, v64df u)
{
return (v64df_atangent_aux (VECTOR_INIT (0.0), v, u, 1, __mask));
}
DEF_VARIANTS2 (atan2, df, df)

View File

@ -0,0 +1,161 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_atangent.c in Newlib. */
#include <float.h>
#include "amdgcnmach.h"
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_fabsv) \
&& __has_builtin (__builtin_gcn_frexpv_exp)
DEF_VD_MATH_FUNC (v64df, atangent, v64df x, v64df v, v64df u, int arctan2)
{
static const double ROOT3 = 1.73205080756887729353;
static const double a[] = { 0.0, 0.52359877559829887308, 1.57079632679489661923,
1.04719755119659774615 };
static const double q[] = { 0.41066306682575781263e+2,
0.86157349597130242515e+2,
0.59578436142597344465e+2,
0.15024001160028576121e+2 };
static const double p[] = { -0.13688768894191926929e+2,
-0.20505855195861651981e+2,
-0.84946240351320683534e+1,
-0.83758299368150059274 };
static const float z_rooteps = 7.4505859692e-9;
FUNCTION_INIT (v64df);
v64df zero = VECTOR_INIT (0.0);
v64df pi = VECTOR_INIT (__PI);
v64df pi_over_two = VECTOR_INIT (__PI_OVER_TWO);
v64df res;
v64si branch = VECTOR_INIT (0);
/* Preparation for calculating arctan2. */
if (arctan2)
{
VECTOR_IF (u == 0.0, cond)
VECTOR_IF2 (v == 0.0, cond2, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (0.0), cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, pi_over_two, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (~branch, cond)
/* Get the exponent values of the inputs. */
v64si expv = __builtin_gcn_frexpv_exp (v);
v64si expu = __builtin_gcn_frexpv_exp (u);
/* See if a divide will overflow. */
v64si e = expv - expu;
VECTOR_IF2 (e > DBL_MAX_EXP, cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, pi_over_two, cond2);
VECTOR_ENDIF
/* Also check for underflow. */
VECTOR_IF2 (e < DBL_MIN_EXP, cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, zero, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
}
VECTOR_IF (~branch, cond)
v64df f;
v64si N = VECTOR_INIT (0);
if (arctan2)
f = __builtin_gcn_fabsv (v / u);
else
f = __builtin_gcn_fabsv (x);
VECTOR_IF2 (__builtin_convertvector(f > 1.0, v64si), cond2, cond)
VECTOR_COND_MOVE (f, 1.0 / f, cond2);
VECTOR_COND_MOVE (N, VECTOR_INIT (2), cond2);
VECTOR_ENDIF
VECTOR_IF2 (__builtin_convertvector(f > (2.0 - ROOT3), v64si), cond2, cond)
double A = ROOT3 - 1.0;
VECTOR_COND_MOVE (f, (((A * f - 0.5) - 0.5) + f) / (ROOT3 + f),
cond2);
N += cond2 & 1;
VECTOR_ENDIF
/* Check for values that are too small. */
VECTOR_IF2 (__builtin_convertvector((-z_rooteps < f) & (f < z_rooteps), v64si), cond2, cond)
VECTOR_COND_MOVE (res, f, cond2);
/* Calculate the Taylor series. */
VECTOR_ELSE2 (cond2, cond)
v64df g = f * f;
v64df P = (((p[3] * g + p[2]) * g + p[1]) * g + p[0]) * g;
v64df Q = (((g + q[3]) * g + q[2]) * g + q[1]) * g + q[0];
v64df R = P / Q;
VECTOR_COND_MOVE (res, f + f * R, cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, cond & (N > 1));
res += VECTOR_MERGE (VECTOR_INIT (a[1]), zero, cond & (N == 1));
res += VECTOR_MERGE (VECTOR_INIT (a[2]), zero, cond & (N == 2));
res += VECTOR_MERGE (VECTOR_INIT (a[3]), zero, cond & (N == 3));
VECTOR_ENDIF
if (arctan2)
{
/*if (u < 0.0)*/
VECTOR_COND_MOVE (res, pi - res, u < 0.0);
/*if (v < 0.0)*/
VECTOR_COND_MOVE (res, -res, v < 0.0);
}
/*else if (x < 0.0) */
else
VECTOR_COND_MOVE (res, -res, x < 0.0);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,75 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/mathfp/e_atanh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_log1p_aux (v64df, v64di);
DEF_VD_MATH_FUNC (v64df, atanh, v64df x)
{
static const double zero = 0.0;
static const double one = 1.0, huge = 1e300;
FUNCTION_INIT (v64df);
v64df t;
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
v64si ix = hx & 0x7fffffff;
VECTOR_IF ((ix | ((lx | (-lx)) >> 31)) > 0x3ff00000, cond) // |x|>1
VECTOR_RETURN ((x - x)/(x - x), cond);
VECTOR_ENDIF
VECTOR_IF (ix == 0x3ff00000, cond)
VECTOR_RETURN (x / zero, cond);
VECTOR_ENDIF
VECTOR_IF ((ix < 0x3e300000) & __builtin_convertvector((huge + x) > zero, v64si), cond) // x<2**-28
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
SET_HIGH_WORD (x, ix, NO_COND);
VECTOR_IF (ix < 0x3fe00000, cond) // x < 0.5 */
v64df t2 = x + x;
VECTOR_COND_MOVE (t, 0.5 * v64df_log1p_aux (t2 + t2 * x / (one - x), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (t, 0.5 * v64df_log1p_aux ((x + x) / (one - x), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (hx >= 0, cond)
VECTOR_RETURN (t, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (-t, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (atanh, df, df)

View File

@ -0,0 +1,43 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_copysign.c in Newlib. */
#include "amdgcnmach.h"
DEF_VD_MATH_FUNC (v64df, copysign, v64df x, v64df y)
{
FUNCTION_INIT (v64df);
v64si hx, hy;
GET_HIGH_WORD(hx, x, NO_COND);
GET_HIGH_WORD(hy, y, NO_COND);
SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000), NO_COND);
VECTOR_RETURN (x, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (copysign, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_cos.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_sine_aux (v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, cos, v64df x)
{
return v64df_sine_aux (x, 1, __mask);
}
DEF_VARIANTS (cos, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_cosh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_sineh_aux (v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, cosh, v64df x)
{
return v64df_sineh_aux (x, 1, __mask);
}
DEF_VARIANTS (cosh, df, df)

View File

@ -0,0 +1,185 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/s_erf.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_exp_aux (v64df, v64di);
static const double
tiny = 1e-300,
half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
/* c = (float)0.84506291151 */
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, erf, v64df x)
{
FUNCTION_INIT (v64df);
v64si hx;
GET_HIGH_WORD (hx, x, NO_COND);
v64si ix = hx & 0x7fffffff;
VECTOR_IF (ix >= 0x7ff00000, cond) /* erf(nan)=nan */
v64si i = (hx >> 31) << 1;
/* erf(+-inf)=+-1 */
VECTOR_RETURN (__builtin_convertvector (1 - i, v64df) + one / x, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3feb0000, cond) /* |x|<0.84375 */
VECTOR_IF2 (ix < 0x3e300000, cond2, cond) /* |x|<2**-28 */
VECTOR_IF2 (ix < 0x00800000, cond3, cond2) /* avoid underflow */
VECTOR_RETURN (0.125*(8.0*x + efx8*x), cond3);
VECTOR_ENDIF
VECTOR_RETURN (x + efx*x, cond2);
VECTOR_ENDIF
v64df z = x*x;
v64df r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
v64df s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
v64df y = r/s;
VECTOR_RETURN (x + x*y, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3ff40000, cond) /* 0.84375 <= |x| < 1.25 */
v64df s = __builtin_gcn_fabsv (x) - one;
v64df P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
v64df Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
VECTOR_IF2 (hx >= 0, cond2, cond)
VECTOR_RETURN (erx + P/Q, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (-erx - P/Q, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (ix >= 0x40180000, cond) /* inf>|x|>=6 */
VECTOR_IF2 (hx >= 0, cond2, cond)
VECTOR_RETURN (VECTOR_INIT (1.0 - tiny), cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (VECTOR_INIT (tiny - 1.0), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
x = __builtin_gcn_fabsv(x);
v64df s = 1.0 / (x*x);
v64df R, S;
VECTOR_IF (ix < 0x4006DB6E, cond) /* |x| < 1/0.35 */
VECTOR_COND_MOVE (R, ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7)))))), cond);
VECTOR_COND_MOVE (S, one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8))))))), cond);
VECTOR_ELSE (cond) /* |x| >= 1/0.35 */
VECTOR_COND_MOVE (R, rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6))))), cond);
VECTOR_COND_MOVE (S, one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7)))))), cond);
VECTOR_ENDIF
v64df z;
SET_LOW_WORD (z, VECTOR_INIT(0), NO_COND);
v64df r = v64df_exp_aux (-z*z - 0.5625, __mask)
* v64df_exp_aux ((z-x)*(z+x) + R/S, __mask);
VECTOR_RETURN (one - r/x, hx >= 0);
VECTOR_RETURN (r/x - one, hx < 0);
FUNCTION_RETURN;
}
DEF_VARIANTS (erf, df, df)
#endif

View File

@ -0,0 +1,103 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_exp.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_ispos (v64df);
v64si v64df_numtest (v64df);
static const double INV_LN2 = 1.4426950408889634074;
static const double LN2 = 0.6931471805599453094172321;
static const double p[] = { 0.25, 0.75753180159422776666e-2,
0.31555192765684646356e-4 };
static const double q[] = { 0.5, 0.56817302698551221787e-1,
0.63121894374398504557e-3,
0.75104028399870046114e-6 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_ldexpv)
DEF_VD_MATH_FUNC (v64df, exp, v64df x)
{
FUNCTION_INIT (v64df);
v64si num_type = v64df_numtest (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity.d),
VECTOR_INIT (0.0),
v64df_ispos (x)),
cond);
VECTOR_ELSEIF (num_type == 0, cond)
VECTOR_RETURN (VECTOR_INIT (1.0), cond);
VECTOR_ENDIF
/* Check for out of bounds. */
VECTOR_IF ((x > BIGX) | (x < SMALLX), cond)
errno = ERANGE;
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
/* Check for a value too small to calculate. */
VECTOR_RETURN (VECTOR_INIT (1.0),
(-z_rooteps_f < x) & (x < z_rooteps_f));
/* Calculate the exponent. */
v64si Nneg = __builtin_convertvector (x * INV_LN2 - 0.5, v64si);
v64si Npos = __builtin_convertvector (x * INV_LN2 + 0.5, v64si);
v64si N = VECTOR_MERGE (Nneg, Npos, x < 0.0);
/* Construct the mantissa. */
v64df g = x - __builtin_convertvector (N, v64df) * LN2;
v64df z = g * g;
v64df P = g * ((p[2] * z + p[1]) * z + p[0]);
v64df Q = ((q[3] * z + q[2]) * z + q[1]) * z + q[0];
v64df R = 0.5 + P / (Q - P);
/* Return the floating point value. */
N++;
VECTOR_RETURN (__builtin_gcn_ldexpv (R, N), NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (exp, df, df)
#endif

View File

@ -0,0 +1,32 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* Copyright (C) 2002 by Red Hat, Incorporated. All rights reserved.
*
* Permission to use, copy, modify, and distribute this software
* is freely granted, provided that this notice is preserved.
*/
/* Based on newlib/libm/mathfp/s_exp2.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_pow_aux (v64df, v64df, v64di);
DEF_VD_MATH_FUNC (v64df, exp2, v64df x)
{
return v64df_pow_aux (VECTOR_INIT (2.0), x, __mask);
}
DEF_VARIANTS (exp2, df, df)

View File

@ -0,0 +1,38 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_finite.c in Newlib. */
#include "amdgcnmach.h"
DEF_VD_MATH_PRED (v64si, finite, v64df x)
{
FUNCTION_INIT (v64si);
v64si hx;
GET_HIGH_WORD (hx, x, NO_COND);
return (((hx & 0x7fffffff) - 0x7ff00000) >> 31) != 0;
}
DEF_VARIANTS (finite, si, df)

View File

@ -0,0 +1,199 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/s_fmod.c in Newlib. */
#include "amdgcnmach.h"
DEF_VD_MATH_FUNC (v64df, fmod, v64df x, v64df y)
{
FUNCTION_INIT(v64df);
v64si hx, hy, hz;
v64usi lx, ly, lz;
EXTRACT_WORDS (hx, lx, x);
EXTRACT_WORDS (hy, ly, y);
v64si sx = hx & 0x80000000; /* sign of x */
hx ^=sx; /* |x| */
hy &= 0x7fffffff; /* |y| */
v64df zeroes = VECTOR_MERGE (VECTOR_INIT (-0.0),
VECTOR_INIT (0.0),
sx != 0);
/* purge off exception values */
VECTOR_IF (((hy | ly) == 0) | (hx >= 0x7ff00000)
| ((hy | ((ly | -ly) >> 31)) > 0x7ff00000), cond) // y=0, or x not finite or y is NaN
VECTOR_RETURN ((x * y) / (x * y), cond);
VECTOR_ENDIF
VECTOR_IF (hx <= hy, cond) // |x|<|y| return x
VECTOR_IF2 ((hx < hy) | (lx < ly), cond2, cond)
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_IF2 (lx == ly, cond2, cond)
VECTOR_RETURN (zeroes, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
/* determine ix = ilogb(x) */
v64si ix;
VECTOR_IF (hx < 0x00100000, cond) // subnormal x
VECTOR_IF2 (hx == 0, cond2, cond)
ix = VECTOR_INIT (-1043);
for (v64si i = __builtin_convertvector (lx, v64si);
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
VECTOR_ELSE2 (cond2, cond)
ix = VECTOR_INIT (-1022);
for (v64si i = __builtin_convertvector (hx << 11, v64si);
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
VECTOR_ENDIF
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (ix, (hx >> 20) - 1023, cond);
VECTOR_ENDIF
/* determine iy = ilogb(y) */
v64si iy;
VECTOR_IF (hy < 0x00100000, cond) // subnormal y
VECTOR_IF2 (hy == 0, cond2, cond)
iy = VECTOR_INIT (-1043);
for (v64si i = __builtin_convertvector (ly, v64si);
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
VECTOR_ELSE2 (cond2, cond)
iy = VECTOR_INIT (-1022);
for (v64si i = __builtin_convertvector (hy << 11, v64si);
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (iy, iy - 1, cond2 & (i > 0));
VECTOR_ENDIF
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (iy, (hy >> 20) - 1023, cond);
VECTOR_ENDIF
/* set up {hx,lx}, {hy,ly} and align y to x */
VECTOR_IF (ix >= -1022, cond)
VECTOR_COND_MOVE (hx, 0x00100000 | (0x000fffff & hx), cond);
VECTOR_ELSE (cond) // subnormal x, shift x to normal
{
v64si n = -1022 - ix;
VECTOR_IF2 (n <= 31, cond2, cond)
VECTOR_COND_MOVE (hx, (hx << n) | (lx >> (32 - n)), cond2);
VECTOR_COND_MOVE (lx, lx << n, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (hx, __builtin_convertvector (lx << (n - 32), v64si), cond2);
VECTOR_COND_MOVE (lx, VECTOR_INIT (0U), cond2);
VECTOR_ENDIF
}
VECTOR_ENDIF
VECTOR_IF (iy >= -1022, cond)
VECTOR_COND_MOVE (hy, 0x00100000 | (0x000fffff & hy), cond);
VECTOR_ELSE (cond) // subnormal y, shift y to normal
{
v64si n = -1022 - iy;
VECTOR_IF2 (n <= 31, cond2, cond)
VECTOR_COND_MOVE (hy, (hy << n) | (ly >> (32 - n)), cond2);
VECTOR_COND_MOVE (ly, ly << n, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (hy, __builtin_convertvector (ly << (n - 32), v64si), cond2);
VECTOR_COND_MOVE (ly, VECTOR_INIT (0U), cond2);
VECTOR_ENDIF
}
VECTOR_ENDIF
/* fix point fmod */
v64si n = ix - iy;
v64si cond = n != 0;
while (!ALL_ZEROES_P (cond))
{
hz = hx - hy;
lz = lx - ly;
VECTOR_IF2 (lx < ly, cond2, cond)
VECTOR_COND_MOVE (hz, hz - 1, cond2);
VECTOR_ENDIF
VECTOR_IF2 (hz < 0, cond2, cond)
VECTOR_COND_MOVE (hx, hx + hx + (__builtin_convertvector(lx, v64usi) >> 31), cond2);
VECTOR_COND_MOVE (lx, lx + lx, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_IF2 ((hz | lz) == 0, cond3, cond2) // return sign(x)*0
VECTOR_RETURN (zeroes, cond3);
VECTOR_ENDIF
VECTOR_COND_MOVE (hx, hz + hz + (__builtin_convertvector(lz, v64usi) >> 31), cond2);
VECTOR_COND_MOVE (lx, lz + lz, cond2);
VECTOR_ENDIF
n += cond; // Active lanes should be -1
cond &= (n != 0);
}
hz = hx - hy;
lz = lx - ly;
VECTOR_COND_MOVE (hz, hz - 1, lx < ly);
VECTOR_IF (hz >= 0, cond)
VECTOR_COND_MOVE (hx, hz, cond);
VECTOR_COND_MOVE (lx, lz, cond);
VECTOR_ENDIF
/* convert back to floating value and restore the sign */
VECTOR_RETURN (zeroes, (hx | lx) == 0); // return sign(x)*0
cond = hx < 0x00100000;
while (!ALL_ZEROES_P (cond)) // normalize x
{
VECTOR_COND_MOVE (hx, hx + hx + (lx >> 31), cond);
VECTOR_COND_MOVE (lx, lx + lx, cond);
iy += cond; // Active lanes should be -1
cond &= (hx < 0x00100000);
}
VECTOR_IF (iy >= -1022, cond) // normalize output
VECTOR_COND_MOVE (hx, (hx - 0x00100000) | ((iy + 1023) << 20), cond);
INSERT_WORDS (x, hx | sx, lx, cond);
VECTOR_ELSE (cond) // subnormal output */
n = -1022 - iy;
VECTOR_IF2 (n <= 20, cond2, cond)
VECTOR_COND_MOVE (lx, (lx >> n) | (hx << (32 - n)), cond2);
VECTOR_COND_MOVE (hx, hx >> n, cond2);
VECTOR_ELSEIF2 (n <= 31, cond2, cond)
VECTOR_COND_MOVE (lx, __builtin_convertvector ((hx << (32 - n)) | (lx >> n), v64usi), cond2);
VECTOR_COND_MOVE (hx, sx, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (lx, __builtin_convertvector (hx >> (n - 32), v64usi), cond2);
VECTOR_COND_MOVE (hx, sx, cond2);
VECTOR_ENDIF
INSERT_WORDS (x, hx | sx, lx, cond);
x *= VECTOR_INIT (1.0); /* create necessary signal */
VECTOR_ENDIF
VECTOR_RETURN (x, NO_COND); /* exact output */
FUNCTION_RETURN;
}
DEF_VARIANTS2 (fmod, df, df)

View File

@ -0,0 +1,38 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/w_gamma.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_lgamma_aux (v64df x, v64di __mask);
DEF_VD_MATH_FUNC (v64df, gamma, v64df x)
{
return v64df_lgamma_aux(x, __mask);
}
DEF_VARIANTS (gamma, df, df)

View File

@ -0,0 +1,127 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/e_hypot.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_sqrt_aux (v64df, v64di);
DEF_VD_MATH_FUNC (v64df, hypot, v64df x, v64df y)
{
FUNCTION_INIT (v64df);
v64df a = x;
v64df b = y;
v64si ha;
GET_HIGH_WORD (ha, x, NO_COND);
ha &= 0x7fffffffL;
v64si hb;
GET_HIGH_WORD (hb, y, NO_COND);
hb &= 0x7fffffffL;
VECTOR_IF (hb > ha, cond)
VECTOR_COND_MOVE (a, y, cond);
VECTOR_COND_MOVE (b, x, cond);
v64si j = ha;
VECTOR_COND_MOVE (ha, hb, cond);
VECTOR_COND_MOVE (hb, j, cond);
VECTOR_ENDIF
SET_HIGH_WORD (a, ha, NO_COND); /* a <- |a| */
SET_HIGH_WORD (b, hb, NO_COND); /* b <- |b| */
VECTOR_IF((ha - hb) > 0x3c00000L, cond) // x/y > 2**60 */
VECTOR_RETURN (a + b, cond);
VECTOR_ENDIF
v64si k = VECTOR_INIT (0);
VECTOR_IF (ha > 0x5f300000L, cond) /* a>2**500 */
VECTOR_IF2 (ha >= 0x7ff00000L, cond2, cond) /* Inf or NaN */
v64df w = a + b; // for sNaN */
v64si low;
GET_LOW_WORD (low, a, cond2);
VECTOR_COND_MOVE (w, a, cond2 & (((ha & 0xfffff) | low) == 0));
GET_LOW_WORD (low, b, cond2);
VECTOR_COND_MOVE (w, b, cond2 & (((hb & 0xfffff) | low) == 0));
VECTOR_RETURN (w, cond);
VECTOR_ENDIF
/* scale a and b by 2**-600 */
VECTOR_COND_MOVE (ha, ha - 0x25800000, cond);
VECTOR_COND_MOVE (hb, hb - 0x25800000, cond);
VECTOR_COND_MOVE (k, k + 600, cond);
SET_HIGH_WORD (a, ha, cond);
SET_HIGH_WORD (b, hb, cond);
VECTOR_ENDIF
VECTOR_IF (hb < 0x20b00000, cond) /* b < 2**-500 */
VECTOR_IF2 (hb <= 0x000fffff, cond2, cond) /* subnormal b or 0 */
v64si low;
GET_LOW_WORD (low, b, cond);
VECTOR_RETURN (a, cond2 & ((hb | low) == 0));
/* t1=2^1022 */
v64df t1 = VECTOR_INIT (0.0);
SET_HIGH_WORD (t1, VECTOR_INIT (0x7fd00000), cond2);
VECTOR_COND_MOVE (b, b * t1, cond2);
VECTOR_COND_MOVE (a, a * t1, cond2);
VECTOR_COND_MOVE (k, k - 1022, cond2);
VECTOR_ELSE2 (cond2, cond) /* scale a and b by 2^600 */
VECTOR_COND_MOVE (ha, ha + 0x25800000, cond2); /* a *= 2^600 */
VECTOR_COND_MOVE (hb, hb + 0x25800000, cond2); /* b *= 2^600 */
VECTOR_COND_MOVE (k, k - 600, cond2);
SET_HIGH_WORD (a, ha, cond2);
SET_HIGH_WORD (b, hb, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
/* medium size a and b */
v64df w = a - b;
VECTOR_IF (w > b, cond)
v64df t1 = VECTOR_INIT (0.0);
SET_HIGH_WORD (t1, ha, cond);
v64df t2 = a - t1;
VECTOR_COND_MOVE (w, v64df_sqrt_aux (t1*t1 - (b*(-b) - t2 * (a + t1)), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (a, a+a, cond);
v64df y1 = VECTOR_INIT (0.0);
SET_HIGH_WORD (y1, hb, cond);
v64df y2 = b - y1;
v64df t1;
SET_HIGH_WORD (t1, ha + 0x00100000, cond);
v64df t2 = a - t1;
VECTOR_COND_MOVE (w, v64df_sqrt_aux (t1*y1 - (w*(-w) - (t1*y2 + t2*b)), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (k != 0, cond)
v64si high;
v64df t1 = VECTOR_INIT (1.0);
GET_HIGH_WORD (high, t1, cond);
SET_HIGH_WORD (t1, high + (k << 20), cond);
VECTOR_RETURN (t1 * w, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (w, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS2 (hypot, df, df)

View File

@ -0,0 +1,59 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_ilogb.c in Newlib. */
#include "amdgcnmach.h"
DEF_VD_MATH_PRED (v64si, ilogb, v64df x)
{
FUNCTION_INIT(v64si);
v64si hx, lx, ix;
EXTRACT_WORDS (hx, lx, x);
hx &= 0x7fffffff;
VECTOR_IF (hx < 0x00100000, cond)
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond & ((hx | lx) == 0)); // FP_ILOGB0
VECTOR_IF2 (hx == 0, cond2, cond)
ix = VECTOR_INIT (-1043);
for (v64si i = lx;
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
VECTOR_ELSE2 (cond2, cond)
ix = VECTOR_INIT (-1022);
for (v64si i = (hx << 11);
!ALL_ZEROES_P (cond2 & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond2 & (i > 0));
VECTOR_ENDIF
VECTOR_RETURN (ix, cond);
VECTOR_ENDIF
VECTOR_RETURN ((hx >> 20) - 1023, hx < 0x7ff00000);
VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (ilogb, si, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_isnan.c in Newlib. */
#include "amdgcnmach.h"
DEF_VD_MATH_PRED (v64si, isnan, v64df x)
{
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
hx &= 0x7fffffff;
hx |= (lx | (-lx)) >> 31;
hx = 0x7ff00000 - hx;
return (hx >> 31) != 0;
}
DEF_VARIANTS (isnan, si, df)

View File

@ -0,0 +1,47 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_ispos.c in Newlib. */
#include "amdgcnmach.h"
v64si
v64df_ispos (v64df x)
{
// Explicitly create mask for internal function.
v64si __mask = VECTOR_INIT (-1);
FUNCTION_INIT (v64si);
v64si hx;
GET_HIGH_WORD (hx, x, NO_COND);
VECTOR_RETURN ((hx & 0x80000000) == 0, NO_COND);
FUNCTION_RETURN;
}

View File

@ -0,0 +1,44 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/w_lgamma.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_finite_aux (v64df x, v64di __mask);
v64df v64df_lgamma_r_aux (v64df x, v64si *signgamp, v64di __mask);
DEF_VD_MATH_FUNC (v64df, lgamma, v64df x)
{
v64df y = v64df_lgamma_r_aux(x, &(_REENT_V64SI_SIGNGAM(_V64_REENT)), __mask);
if (ALL_ZEROES_P(v64df_finite_aux(y, __mask)) & !ALL_ZEROES_P(v64df_finite_aux(x, __mask))) {
/* lgamma(finite) overflow */
errno = ERANGE;
}
return y;
}
DEF_VARIANTS (lgamma, df, df)

View File

@ -0,0 +1,300 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/er_lgamma.c in Newlib. */
#include "amdgcnmach.h"
static const double two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
/* tt = -(tail of tf) */
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
static const double zero= 0.00000000000000000000e+00;
v64df v64df_cos_aux (v64df x, v64di __mask);
v64df v64df_log_aux (v64df x, v64di __mask);
v64df v64df_sin_aux (v64df x, v64di __mask);
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_floorv) \
&& __has_builtin (__builtin_gcn_fabsv)
static v64df
v64df_sin_pi (v64df x)
{
// Explicitly create mask for internal function.
v64di __mask = VECTOR_INIT (-1L);
FUNCTION_INIT (v64df);
v64df y, z;
v64si n, ix;
GET_HIGH_WORD (ix, x, NO_COND);
ix &= 0x7fffffff;
VECTOR_IF (ix < 0x3fd00000, cond)
VECTOR_RETURN (v64df_sin_aux (pi * x, __mask), cond);
VECTOR_ENDIF
y = -x; /* x is assume negative */
/*
* argument reduction, make sure inexact flag not raised if input
* is an integer
*/
z = __builtin_gcn_floorv (y);
VECTOR_IF (z != y, cond)
/* inexact anyway */
VECTOR_COND_MOVE(y, y * 0.5, cond);
VECTOR_COND_MOVE(y, 2.0 * (y - __builtin_gcn_floorv (y)), cond); /* y = |x| mod 2.0 */
VECTOR_COND_MOVE(n, __builtin_convertvector(y * 4.0, v64si), cond);
VECTOR_ELSE (cond)
VECTOR_IF2 (__builtin_convertvector(ix >= 0x43400000, v64di), cond2, cond)
VECTOR_COND_MOVE(y, VECTOR_INIT(zero), cond2);
VECTOR_COND_MOVE(n, VECTOR_INIT(0), cond2); /* y must be even */
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE(z, y + two52 /* exact */, cond2 & __builtin_convertvector(ix < 0x43300000, v64di));
GET_LOW_WORD (n, z, cond2);
VECTOR_COND_MOVE(n, n & 1, cond2);
VECTOR_COND_MOVE(y, __builtin_convertvector(n, v64df), cond2);
VECTOR_COND_MOVE(n, n << 2, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (n == 0, cond)
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * y, __mask), cond);
VECTOR_ELSEIF (n == 1 | n == 2, cond)
VECTOR_COND_MOVE(y, v64df_cos_aux (pi * (0.5 - y), __mask), cond);
VECTOR_ELSEIF (n == 3 | n == 4, cond)
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * (VECTOR_INIT(one) - y), __mask), cond);
VECTOR_ELSEIF (n == 5 | n == 6, cond)
VECTOR_COND_MOVE(y, -v64df_cos_aux (pi * (y - 1.5), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE(y, v64df_sin_aux (pi * (y - 2.0), __mask), cond);
VECTOR_ENDIF
VECTOR_RETURN(-y, NO_COND);
FUNCTION_RETURN;
}
DEF_VD_MATH_FUNC (v64df, lgamma_r, v64df x, v64si *signgamp)
{
FUNCTION_INIT (v64df);
v64df t,y,z,nadj = VECTOR_INIT(0.0),p,p1,p2,p3,q,r,w;
v64si i,hx,lx,ix;
EXTRACT_WORDS(hx,lx,x);
/* purge off +-inf, NaN, +-0, and negative arguments */
*signgamp = VECTOR_INIT(1);
ix = hx&0x7fffffff;
VECTOR_IF(ix>=0x7ff00000, cond)
VECTOR_RETURN (x*x, cond);
VECTOR_ENDIF
VECTOR_IF((ix|lx)==0, cond)
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (hx<0));
VECTOR_RETURN(one/(x-x), cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3b900000, cond) /* |x|<2**-70, return -log(|x|) */
VECTOR_IF2(hx<0, cond2, cond)
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond);
VECTOR_RETURN (-v64df_log_aux(-x, __mask), cond2);
VECTOR_ELSE2(cond2, cond)
VECTOR_RETURN (-v64df_log_aux(x, __mask), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (hx < 0, cond)
VECTOR_IF2(ix>=0x43300000, cond2, cond) /* |x|>=2**52, must be -integer */
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
VECTOR_ENDIF
VECTOR_COND_MOVE (t, v64df_sin_pi (x), cond);
VECTOR_IF2(__builtin_convertvector(t==zero, v64si), cond2, cond)
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
VECTOR_ENDIF
VECTOR_COND_MOVE(nadj, v64df_log_aux(VECTOR_INIT(pi)/__builtin_gcn_fabsv(t*x), __mask), cond);
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & __builtin_convertvector(t < zero, v64si));
VECTOR_COND_MOVE(x, -x, cond);
VECTOR_ENDIF
/* purge off 1 and 2 */
VECTOR_IF((((ix-0x3ff00000)|lx)==0)|(((ix-0x40000000)|lx)==0), cond)
VECTOR_COND_MOVE(r, VECTOR_INIT(0.0), cond);
/* for x < 2.0 */
VECTOR_ELSEIF(ix<0x40000000, cond)
VECTOR_IF2(ix<=0x3feccccc, cond2, cond)
/* lgamma(x) = lgamma(x+1)-log(x) */
r = -v64df_log_aux(x, __mask);
VECTOR_IF2(ix>=0x3FE76944, cond3, cond2)
VECTOR_COND_MOVE(y, one-x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
VECTOR_ELSEIF2(ix>=0x3FCDA661, cond3, cond2)
VECTOR_COND_MOVE(y, x-(tc-one), cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
VECTOR_ELSE2(cond3, cond2)
VECTOR_COND_MOVE(y, x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
VECTOR_ENDIF
VECTOR_ELSE2(cond2, cond)
VECTOR_COND_MOVE(r, VECTOR_INIT(zero), cond2);
VECTOR_IF2(ix>=0x3FFBB4C3, cond3, cond2) /* [1.7316,2] */
VECTOR_COND_MOVE(y, VECTOR_INIT(2.0)-x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
VECTOR_ELSEIF2(ix>=0x3FF3B4C4, cond3, cond2) /* [1.23,1.73] */
VECTOR_COND_MOVE(y, x-tc, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
VECTOR_ELSE2(cond3, cond2)
VECTOR_COND_MOVE(y, x-one, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF2(i==0, cond2, cond)
VECTOR_COND_MOVE(z, y*y, cond2);
VECTOR_COND_MOVE(p1, a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))), cond2);
VECTOR_COND_MOVE(p2, z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))), cond2);
VECTOR_COND_MOVE(p, y*p1+p2, cond2);
VECTOR_COND_MOVE(r, r + (p-0.5*y), cond2);
VECTOR_ELSEIF2(i==1, cond2, cond)
VECTOR_COND_MOVE(z, y*y, cond2);
VECTOR_COND_MOVE(w, z*y, cond2);
VECTOR_COND_MOVE(p1, t0+w*(t3+w*(t6+w*(t9 +w*t12))), cond2); /* parallel comp */
VECTOR_COND_MOVE(p2, t1+w*(t4+w*(t7+w*(t10+w*t13))), cond2);
VECTOR_COND_MOVE(p3, t2+w*(t5+w*(t8+w*(t11+w*t14))), cond2);
VECTOR_COND_MOVE(p, z*p1-(tt-w*(p2+y*p3)), cond2);
VECTOR_COND_MOVE(r, r + (tf + p), cond2);
VECTOR_ELSEIF2(i==2, cond2, cond)
VECTOR_COND_MOVE(p1, y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))), cond2);
VECTOR_COND_MOVE(p2, one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))), cond2);
VECTOR_COND_MOVE(r, r + (-0.5*y + p1/p2), cond2);
VECTOR_ENDIF
VECTOR_ELSEIF(ix<0x40200000, cond)
/* x < 8.0 */
VECTOR_COND_MOVE(i, __builtin_convertvector(x, v64si), cond);
VECTOR_COND_MOVE(t, VECTOR_INIT(zero), cond);
VECTOR_COND_MOVE(y, x-__builtin_convertvector(i, v64df), cond);
VECTOR_COND_MOVE(p, y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))), cond);
VECTOR_COND_MOVE(q, one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))), cond);
VECTOR_COND_MOVE(r, half*y+p/q, cond);
VECTOR_COND_MOVE(z, VECTOR_INIT(one), cond); /* lgamma(1+s) = log(s) + lgamma(s) */
VECTOR_IF2(i==7, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+6.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i==7 | i==6, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+5.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=5, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+4.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=4, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+3.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=3, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+2.0), cond2);
VECTOR_COND_MOVE(r, r + v64df_log_aux(z, __mask), cond2);
VECTOR_ENDIF
/* 8.0 <= x < 2**58 */
VECTOR_ELSEIF(ix < 0x43900000, cond)
VECTOR_COND_MOVE(t, v64df_log_aux(x, __mask), cond);
VECTOR_COND_MOVE(z, one/x, cond);
VECTOR_COND_MOVE(y, z*z, cond);
VECTOR_COND_MOVE(w, w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))), cond);
VECTOR_COND_MOVE(r, (x-half)*(t-one)+w, cond);
VECTOR_ELSE(cond)
/* 2**58 <= x <= inf */
VECTOR_COND_MOVE(r, x*(v64df_log_aux(x, __mask)-one), cond);
VECTOR_ENDIF
VECTOR_IF(hx<0, cond)
VECTOR_COND_MOVE(r, nadj - r, cond);
VECTOR_ENDIF
VECTOR_RETURN(r, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,111 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_logarithm.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_finite (v64df);
v64si v64df_isnan (v64df);
static const double a[] = { -0.64124943423745581147e+02,
0.16383943563021534222e+02,
-0.78956112887481257267 };
static const double b[] = { -0.76949932108494879777e+03,
0.31203222091924532844e+03,
-0.35667977739034646171e+02 };
static const double C1 = 22713.0 / 32768.0;
static const double C2 = 1.428606820309417232e-06;
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_frexpv_mant) \
&& __has_builtin (__builtin_gcn_frexpv_exp) \
DEF_VD_MATH_FUNC (v64df, log, v64df x)
{
FUNCTION_INIT (v64df);
/* Check for domain/range errors here. */
VECTOR_IF (x == 0.0, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (-z_infinity.d), cond);
VECTOR_ELSEIF (x < 0.0, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
VECTOR_ELSEIF (__builtin_convertvector (~v64df_finite (x), v64di), cond)
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_notanum.d),
VECTOR_INIT (z_infinity.d),
v64df_isnan (x)),
cond);
VECTOR_ENDIF
/* Get the exponent and mantissa where x = f * 2^N. */
v64df f = __builtin_gcn_frexpv_mant (x);
v64si N = __builtin_gcn_frexpv_exp (x);
v64df z = f - 0.5;
VECTOR_IF (f > __SQRT_HALF, cond)
VECTOR_COND_MOVE (z, (z - 0.5) / (f * 0.5 + 0.5), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (N, N - 1, cond);
VECTOR_COND_MOVE (z, z / (z * 0.5 + 0.5), cond);
VECTOR_ENDIF
v64df w = z * z;
/* Use Newton's method with 4 terms. */
z += z * w * ((a[2] * w + a[1]) * w + a[0]) / (((w + b[2]) * w + b[1]) * w + b[0]);
v64df Nf = __builtin_convertvector (N, v64df);
VECTOR_COND_MOVE (z, (Nf * C2 + z) + Nf * C1, N != 0);
VECTOR_RETURN (z, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (log, df, df)
DEF_VD_MATH_FUNC (v64df, log1p, v64df x)
{
/* TODO: Implement algorithm with better precision. */
return v64df_log_aux (1 + x, __mask);
}
DEF_VARIANTS (log1p, df, df)
#endif

View File

@ -0,0 +1,50 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_logarithm.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_log_aux (v64df, v64di);
static const double C3 = 0.43429448190325182765;
DEF_VD_MATH_FUNC (v64df, log10, v64df x)
{
return v64df_log_aux (x, __mask) * C3;
}
DEF_VARIANTS (log10, df, df)

View File

@ -0,0 +1,26 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
#include "amdgcnmach.h"
v64df v64df_log_aux (v64df, v64di);
static const double C3 = 1.4426950408889634073599246810019;
DEF_VD_MATH_FUNC (v64df, log2, v64df x)
{
return v64df_log_aux (x, __mask) * C3;
}
DEF_VARIANTS (log2, df, df)

View File

@ -0,0 +1,80 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_modf.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_numtest (v64df);
DEF_VD_MATH_FUNC (v64df, modf, v64df x, v64df *iptr)
{
FUNCTION_INIT (v64df);
v64df ret_i;
v64si i0, i1;
EXTRACT_WORDS(i0, i1, x);
v64si j0 = ((i0 >> 20) & 0x7ff) - 0x3ff; /* exponent of x */
v64df zero;
v64si i;
INSERT_WORDS (zero, i0 & 0x80000000, VECTOR_INIT (0), NO_COND);
VECTOR_IF (j0 < 20, cond) /* integer part in x*/
VECTOR_IF2 (j0 < 0, cond2, cond) /* |x|<1 */
VECTOR_COND_MOVE (ret_i, zero, cond2);
VECTOR_RETURN (x, cond2);
VECTOR_ELSE2 (cond2, cond)
i = (0x000fffff) >> j0;
VECTOR_IF2 (((i0 & i) | i1) == 0, cond3, cond2) /* x is integral */
VECTOR_COND_MOVE (ret_i, x, cond3);
VECTOR_RETURN (zero, cond3);
VECTOR_ELSE2 (cond3, cond2)
INSERT_WORDS (ret_i, i0 & ~i, VECTOR_INIT (0), cond3);
VECTOR_RETURN (x - ret_i, cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_ELSEIF (j0 > 51, cond) /* no fraction part */
VECTOR_COND_MOVE (ret_i, x, cond);
VECTOR_IF2 (v64df_numtest (x) == NAN, cond2, cond)
VECTOR_COND_MOVE (ret_i, x + x, cond2);
VECTOR_RETURN (ret_i, cond2); /* x is NaN, return NaN */
VECTOR_ENDIF
VECTOR_RETURN (zero, cond); /* return +- 0 */
VECTOR_ELSE (cond)
i = 0xffffffff >> (j0 - 20);
VECTOR_IF2 ((i1 & i) == 0, cond2, cond)
VECTOR_COND_MOVE (ret_i, x, cond2);
INSERT_WORDS (x, i0 & 0x80000000, VECTOR_INIT (0), cond2);
VECTOR_RETURN (x, cond2);
VECTOR_ELSE2 (cond2, cond)
INSERT_WORDS (ret_i, i0, i1 & ~i, cond2);
VECTOR_RETURN (x - ret_i, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
*iptr = ret_i;
FUNCTION_RETURN;
}

View File

@ -0,0 +1,60 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_numtest.c in Newlib. */
#include "amdgcnmach.h"
v64si
v64df_numtest (v64df x)
{
// Explicitly create mask for internal function.
v64si __mask = VECTOR_INIT (-1);
FUNCTION_INIT (v64si);
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
v64si exp = (hx & 0x7ff00000) >> 20;
/* Check for a zero input. */
VECTOR_RETURN (VECTOR_INIT (0), x == 0.0);
/* Check for not a number or infinity. */
VECTOR_IF (exp == 0x7ff, cond)
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (NAN),
VECTOR_INIT (INF),
((hx & 0xf0000) != 0) | (lx != 0)),
cond);
/* Otherwise it's a finite value. */
VECTOR_ELSE (cond)
VECTOR_RETURN (VECTOR_INIT (NUM), cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}

View File

@ -0,0 +1,336 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/math/e_pow.c in Newlib. */
#include "amdgcnmach.h"
static const double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
zero = 0.0,
one = 1.0,
two = 2.0,
two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */
L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */
L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */
L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */
L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */
L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */
P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */
lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */
ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */
cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */
cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/
ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */
ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
v64df v64df_sqrt_aux (v64df, v64di);
v64df v64df_scalbn_aux (v64df, v64si, v64di);
static v64df v64df_math_oflow (v64di sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p769),
VECTOR_INIT (0x1p769), sign) * 0x1p769;
}
static v64df v64df_math_uflow (v64di sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p-767),
VECTOR_INIT (0x1p-767), sign) * 0x1p-767;
}
static v64si v64df_issignaling_inline (v64df x)
{
v64si __mask = VECTOR_INIT (-1);
v64si ix;
GET_HIGH_WORD (ix, x, NO_COND);
/* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
significand is 0 for signalling NaN. */
return ((ix & 0x7ff00000) == 0x7ff00000) & ((ix & 0x00080000) == 0);
}
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, pow, v64df x, v64df y)
{
FUNCTION_INIT (v64df);
v64si hx, hy, lx, ly;
EXTRACT_WORDS(hx,lx,x);
EXTRACT_WORDS(hy,ly,y);
v64si ix = hx&0x7fffffff;
v64si iy = hy&0x7fffffff;
/* y==zero: x**0 = 1 unless x is snan */
VECTOR_IF ((iy|ly)==0, cond)
VECTOR_RETURN (x + y, cond & v64df_issignaling_inline(x));
VECTOR_RETURN (VECTOR_INIT (1.0), cond);
VECTOR_ENDIF
/* x|y==NaN return NaN unless x==1 then return 1 */
VECTOR_IF ((ix > 0x7ff00000) | ((ix==0x7ff00000)&(lx!=0))
| (iy > 0x7ff00000) | ((iy==0x7ff00000)&(ly!=0)), cond)
VECTOR_RETURN (VECTOR_INIT (1.0), cond & ((hx-0x3ff00000)|lx)==0
& ~v64df_issignaling_inline(y));
VECTOR_RETURN (x + y, cond);
VECTOR_ENDIF
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
* yisint = 1 ... y is an odd int
* yisint = 2 ... y is an even int
*/
v64si yisint = VECTOR_INIT (0);
VECTOR_IF (hx < 0, cond)
VECTOR_IF2(iy>=0x43400000, cond2, cond)
VECTOR_COND_MOVE (yisint, VECTOR_INIT (2), cond2); /* even integer y */
VECTOR_ELSEIF2 (iy>=0x3ff00000, cond2, cond)
v64si k = (iy>>20)-0x3ff; /* exponent */
VECTOR_IF2 (k>20, cond3, cond2)
v64si j = ly>>(52-k);
VECTOR_COND_MOVE (yisint, 2-(j&1), cond3 & (j<<(52-k))==ly);
VECTOR_ELSEIF2 (ly==0, cond3, cond2)
v64si j = iy>>(20-k);
VECTOR_COND_MOVE (yisint, 2-(j&1), cond3 & (j<<(20-k))==iy);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_ENDIF
/* special value of y */
VECTOR_IF (ly==0, cond)
VECTOR_IF2 (iy==0x7ff00000, cond2, cond) /* y is +-inf */
VECTOR_IF2 (((ix-0x3ff00000)|lx)==0, cond3, cond2)
VECTOR_RETURN (VECTOR_INIT (1.0), cond3); /* +-1**+-inf = 1 */
VECTOR_ELSEIF2 (ix >= 0x3ff00000, cond3, cond2) /* (|x|>1)**+-inf = inf,0 */
VECTOR_RETURN (y, cond3 & hy>=0);
VECTOR_RETURN (VECTOR_INIT (0.0), cond3);
VECTOR_ELSE2 (cond3, cond2) /* (|x|<1)**-,+inf = inf,0 */
VECTOR_RETURN (-y, cond3 & hy<0);
VECTOR_RETURN (VECTOR_INIT (0.0), cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF2 (iy==0x3ff00000, cond2, cond) /* y is +-1 */
VECTOR_RETURN (VECTOR_INIT (1.0) / x, cond2 & hy<0);
VECTOR_RETURN (x, cond2);
VECTOR_ENDIF
VECTOR_RETURN (x*x, cond & hy==0x40000000); /* y is 2 */
/* y is 0.5 */
/* x >= +0 */
VECTOR_RETURN (v64df_sqrt_aux (x, __mask), cond & (hy==0x3fe00000) & (hx>=0));
VECTOR_ENDIF
v64df ax = __builtin_gcn_fabsv(x);
/* special value of x */
VECTOR_IF (lx==0, cond)
VECTOR_IF2 ((ix==0x7ff00000)|(ix==0)|(ix==0x3ff00000), cond2, cond)
v64df z = ax; /*x is +-0,+-inf,+-1*/
VECTOR_COND_MOVE (z, VECTOR_INIT (1.0) / z, cond2 & (hy<0)); /* z = (1/|x|) */
VECTOR_IF2 (hx<0, cond3, cond2)
VECTOR_IF2 (((ix-0x3ff00000)|yisint)==0, cond4, cond3)
VECTOR_COND_MOVE (z, (z-z)/(z-z), cond4); /* (-1)**non-int is NaN */
VECTOR_ELSEIF2 (yisint==1, cond4, cond3)
VECTOR_COND_MOVE (z, -z, cond4); /* (x<0)**odd = -(|x|**odd) */
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_RETURN (z, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
/* (x<0)**(non-int) is NaN */
VECTOR_RETURN ((x-x)/(x-x), ((((hx >> 31) & 1) - 1)|yisint)==0);
v64df t1, t2;
/* |y| is huge */
VECTOR_IF(iy>0x41e00000, cond) /* if |y| > 2**31 */
VECTOR_IF2 (iy>0x43f00000, cond2, cond) /* if |y| > 2**64, must o/uflow */
VECTOR_IF2 (ix<=0x3fefffff, cond3, cond2)
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond3 & (hy<0));
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond3);
VECTOR_ENDIF
VECTOR_IF2 (ix>=0x3ff00000, cond3, cond2)
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond3 & (hy>0));
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond3);
VECTOR_ENDIF
VECTOR_ENDIF
/* over/underflow if x is not close to one */
VECTOR_IF2 (ix<0x3fefffff, cond2, cond)
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond2 & (hy<0));
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond2);
VECTOR_ENDIF
VECTOR_IF2 (ix>0x3ff00000, cond2, cond)
VECTOR_RETURN (v64df_math_oflow (VECTOR_INIT (0L)), cond2 & (hy>0));
VECTOR_RETURN (v64df_math_uflow (VECTOR_INIT (0L)), cond2);
VECTOR_ENDIF
/* now |1-x| is tiny <= 2**-20, suffice to compute
log(x) by x-x^2/2+x^3/3-x^4/4 */
v64df t = ax-1; /* t has 20 trailing zeros */
v64df w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
v64df u = ivln2_h*t; /* ivln2_h has 21 sig. bits */
v64df v = t*ivln2_l-w*ivln2;
VECTOR_COND_MOVE (t1, u+v, cond);
SET_LOW_WORD (t1, VECTOR_INIT (0), cond);
VECTOR_COND_MOVE (t2, v-(t1-u), cond);
VECTOR_ELSE (cond)
v64si n = VECTOR_INIT (0);
/* take care subnormal number */
VECTOR_IF2 (ix<0x00100000, cond2, cond)
VECTOR_COND_MOVE (ax, ax * two53, cond2);
VECTOR_COND_MOVE (n, n - 53, cond2);
GET_HIGH_WORD (ix, ax, cond2);
VECTOR_ENDIF
n += ((ix)>>20)-0x3ff;
v64si j = ix&0x000fffff;
/* determine interval */
ix = j|0x3ff00000; /* normalize ix */
v64si k;
VECTOR_IF2 (j<=0x3988E, cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2); /* |x|<sqrt(3/2) */
VECTOR_ELSEIF2 (j<0xBB67A, cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (1), cond2); /* |x|<sqrt(3) */
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2);
VECTOR_COND_MOVE (n, n + 1, cond2);
VECTOR_COND_MOVE (ix, ix - 0x00100000, cond2);
VECTOR_ENDIF
SET_HIGH_WORD (ax, ix, cond);
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
v64df bp_k = VECTOR_MERGE (VECTOR_INIT (bp[1]), VECTOR_INIT (bp[0]), k == 1);
v64df u = ax-bp_k; /* bp[0]=1.0, bp[1]=1.5 */
v64df v = 1.0/(ax+bp_k);
v64df s = u*v;
v64df s_h = s;
SET_LOW_WORD (s_h, VECTOR_INIT (0), cond);
/* t_h=ax+bp[k] High */
v64df t_h = VECTOR_INIT (0.0);
SET_HIGH_WORD (t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18), cond);
v64df t_l = ax - (t_h-bp_k);
v64df s_l = v*((u-s_h*t_h)-s_h*t_l);
/* compute log(ax) */
v64df s2 = s*s;
v64df r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
r += s_l*(s_h+s);
s2 = s_h*s_h;
t_h = 3.0+s2+r;
SET_LOW_WORD (t_h, VECTOR_INIT (0), cond);
t_l = r-((t_h-3.0)-s2);
/* u+v = s*(1+...) */
u = s_h*t_h;
v = s_l*t_h+t_l*s;
/* 2/(3log2)*(s+...) */
v64df p_h = u+v;
SET_LOW_WORD (p_h, VECTOR_INIT (0), cond);
v64df p_l = v-(p_h-u);
v64df z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
v64df dp_l_k = VECTOR_MERGE (VECTOR_INIT (dp_l[1]), VECTOR_INIT (dp_l[0]), k == 1);
v64df z_l = cp_l*p_h+p_l*cp+dp_l_k;
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
v64df t = __builtin_convertvector (n, v64df);
v64df dp_h_k = VECTOR_MERGE (VECTOR_INIT (dp_h[1]), VECTOR_INIT (dp_h[0]), k == 1);
VECTOR_COND_MOVE (t1, ((z_h+z_l)+dp_h_k)+t, cond);
SET_LOW_WORD (t1, VECTOR_INIT (0), cond);
VECTOR_COND_MOVE (t2, z_l-(((t1-t)-dp_h_k)-z_h), cond);
VECTOR_ENDIF
v64df s = VECTOR_INIT (1.0); /* s (sign of result -ve**odd) = -1 else = 1 */
VECTOR_COND_MOVE (s, VECTOR_INIT (-1.0), /* (-ve)**(odd int) */
((hx>>31) != 0)&(yisint == 1));
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
v64df y1 = y;
SET_LOW_WORD (y1, VECTOR_INIT (0), NO_COND);
v64df p_l = (y-y1)*t1+y*t2;
v64df p_h = y1*t1;
v64df z = p_l+p_h;
v64si i, j;
EXTRACT_WORDS(j, i, z);
VECTOR_IF (j>=0x40900000, cond) /* z >= 1024 */
/* if z > 1024 */
v64di cond_di = __builtin_convertvector (cond, v64di);
VECTOR_RETURN (v64df_math_oflow(s<0), cond & (((j-0x40900000)|i)!=0)); /* overflow */
VECTOR_RETURN (v64df_math_oflow(s<0), cond_di & (p_l+ovt>z-p_h)); /* overflow */
VECTOR_ELSEIF ((j&0x7fffffff)>=0x4090cc00, cond) /* z <= -1075 */
/* z < -1075 */
v64di cond_di = __builtin_convertvector (cond, v64di);
VECTOR_RETURN (v64df_math_uflow(s<0), cond & (((j-0xc090cc00)|i)!=0)); /* underflow */
VECTOR_RETURN (v64df_math_uflow(s<0), cond_di & (p_l<=z-p_h)); /* underflow */
VECTOR_ENDIF
/*
* compute 2**(p_h+p_l)
*/
i = j&0x7fffffff;
v64si k = (i>>20)-0x3ff;
v64si n = VECTOR_INIT (0);
VECTOR_IF (i>0x3fe00000, cond) /* if |z| > 0.5, set n = [z+0.5] */
VECTOR_COND_MOVE (n, j+(0x00100000>>(k+1)), cond);
k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */
v64df t = VECTOR_INIT (0.0);
SET_HIGH_WORD(t, n&~(0x000fffff>>k), cond);
VECTOR_COND_MOVE (n, ((n&0x000fffff)|0x00100000)>>(20-k), cond);
VECTOR_COND_MOVE (n, -n, cond & (j<0));
VECTOR_COND_MOVE (p_h, p_h - t, cond);
VECTOR_ENDIF
v64df t = p_l+p_h;
SET_LOW_WORD(t, VECTOR_INIT (0), NO_COND);
v64df u = t*lg2_h;
v64df v = (p_l-(t-p_h))*lg2+t*lg2_l;
z = u+v;
v64df w = v-(z-u);
t = z*z;
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
v64df r = (z*t1)/(t1-two)-(w+z*w);
z = VECTOR_INIT (1.0)-(r-z);
GET_HIGH_WORD(j,z, NO_COND);
j += (n<<20);
VECTOR_IF ((j>>20)<=0, cond)
VECTOR_COND_MOVE (z, v64df_scalbn_aux (z, n, __mask), cond); /* subnormal output */
VECTOR_ELSE (cond)
SET_HIGH_WORD(z,j, cond);
VECTOR_ENDIF
VECTOR_RETURN (s*z, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (pow, df, df)
#endif

View File

@ -0,0 +1,82 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/e_remainder.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_fmod_aux (v64df, v64df, v64di);
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, remainder, v64df x, v64df p)
{
FUNCTION_INIT (v64df);
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
v64si hp, lp;
EXTRACT_WORDS (hp, lp, p);
v64si sx = hx & 0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
VECTOR_RETURN ((x * p) / (x * p), ((hp | lp) == 0) | ((hx >= 0x7ff00000)
| /* x not finite */
((hp >= 0x7ff00000) & /* p is NaN */
(((hp - 0x7ff00000) | lp) != 0))));
VECTOR_COND_MOVE (x, v64df_fmod_aux (x, p+p, __mask), hp <= 0x7fdfffff); // now x < 2p
VECTOR_RETURN (0.0 * x, ((hx-hp)|(lx-lp))==0);
x = __builtin_gcn_fabsv (x);
p = __builtin_gcn_fabsv (p);
VECTOR_IF (hp < 0x00200000, cond)
VECTOR_IF2 (x + x > p, cond2, __builtin_convertvector(cond, v64di))
VECTOR_COND_MOVE (x, x - p, cond2);
VECTOR_COND_MOVE (x, x - p, cond2 & (x + x >= p));
VECTOR_ENDIF
VECTOR_ELSE (cond)
v64df p_half = 0.5 * p;
VECTOR_IF2 (x > p_half, cond2, __builtin_convertvector(cond, v64di))
VECTOR_COND_MOVE (x, x - p, cond2);
VECTOR_COND_MOVE (x, x - p, cond2 & (x >= p_half));
VECTOR_ENDIF
VECTOR_ENDIF
GET_HIGH_WORD (hx, x, NO_COND);
SET_HIGH_WORD (x, hx ^ sx, NO_COND);
VECTOR_RETURN (x, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (remainder, df, df)
#endif

View File

@ -0,0 +1,83 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_rint.c in Newlib. */
#include "amdgcnmach.h"
static const double TWO52[2] = {
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
};
DEF_VD_MATH_FUNC (v64df, rint, v64df x)
{
FUNCTION_INIT (v64df);
v64si i0, i1;
EXTRACT_WORDS (i0, i1, x);
v64si sx = (i0 >> 31) & 1;
v64df two52 = VECTOR_MERGE (VECTOR_INIT (TWO52[1]), VECTOR_INIT (TWO52[0]), sx != 0);
v64si j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
v64si i;
VECTOR_IF (j0 < 20, cond)
VECTOR_IF2 (j0 < 0, cond2, cond)
VECTOR_RETURN (x, cond2 & (((i0 & 0x7fffffff) | i1) == 0));
VECTOR_COND_MOVE (i1, i1 | (i0 & 0x0fffff), cond2);
VECTOR_COND_MOVE (i0, i0 & 0xfffe0000, cond2);
VECTOR_COND_MOVE (i0, i0 | (((i1 | -i1) >> 12) & 0x80000), cond2);
SET_HIGH_WORD (x, i0, cond2);
v64df w = two52 + x;
v64df t = w - two52;
GET_HIGH_WORD (i0, t, cond2);
SET_HIGH_WORD (t, (i0&0x7fffffff)|(sx<<31), cond2);
VECTOR_RETURN (t, cond2);
VECTOR_ELSE2 (cond2, cond)
i = (0x000fffff) >> j0;
VECTOR_RETURN (x, cond2 & (((i0 & i) | i1) == 0)); /* x is integral */
i >>= 1;
VECTOR_IF2 (((i0 & i) | i1) != 0, cond3, cond2)
VECTOR_COND_MOVE (i1, CAST_VECTOR(v64si, VECTOR_INIT (0x80000000)), cond3 & (j0 == 19));
VECTOR_COND_MOVE (i1, VECTOR_INIT (0), cond3 & (j0 != 19));
VECTOR_COND_MOVE (i0, (i0 & (~i)) | ((0x40000) >> j0), cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_ELSEIF (j0 > 51, cond)
VECTOR_RETURN (x + x, cond & (j0 == 0x400));
VECTOR_RETURN (x, cond);
VECTOR_ELSE (cond)
i = CAST_VECTOR (v64si, VECTOR_INIT (0xffffffff) >> (j0 - 20));
VECTOR_RETURN (x, cond & ((i1 & i) == 0));
i >>= 1;
VECTOR_COND_MOVE (i1, (i1 & (~i)) | (0x40000000 >> (j0 - 20)), cond & ((i1 & i) != 0));
VECTOR_ENDIF
INSERT_WORDS (x, i0, i1, NO_COND);
v64df w = two52 + x;
VECTOR_RETURN (w - two52, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (rint, df, df)

View File

@ -0,0 +1,64 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/e_scalb.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_isnan (v64df);
v64si v64df_finite (v64df);
v64df v64df_rint_aux (v64df, v64di);
v64df v64df_scalbn_aux (v64df, v64si, v64di);
DEF_VD_MATH_FUNC (v64df, scalb, v64df x, v64df fn)
{
FUNCTION_INIT (v64df);
VECTOR_IF (v64df_isnan(x) | v64df_isnan(fn), cond)
VECTOR_RETURN (x * fn, cond);
VECTOR_ENDIF
VECTOR_IF (~v64df_finite (fn), cond)
VECTOR_IF2 (fn > 0.0, cond2, cond)
VECTOR_RETURN (x * fn, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (x / (-fn), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (v64df_rint_aux (fn, __mask) != fn, cond)
VECTOR_RETURN ((fn-fn)/(fn-fn), cond);
VECTOR_ENDIF
VECTOR_IF (fn > 65000.0, cond)
VECTOR_RETURN (v64df_scalbn_aux (x, VECTOR_INIT (65000), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (-fn > 65000.0, cond)
VECTOR_RETURN (v64df_scalbn_aux (x, VECTOR_INIT (-65000), __mask), cond);
VECTOR_ENDIF
VECTOR_RETURN (v64df_scalbn_aux (x, __builtin_convertvector (fn, v64si), __mask),
NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (scalb, df, df)

View File

@ -0,0 +1,72 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/s_scalbn.c in Newlib. */
#include "amdgcnmach.h"
#include <limits.h>
#include <float.h>
static const double
two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
huge = 1.0e+300,
tiny = 1.0e-300;
v64df v64df_copysign_aux (v64df, v64df, v64di);
DEF_VD_MATH_FUNC (v64df, scalbn, v64df x, v64si n)
{
FUNCTION_INIT (v64df);
const v64df huge_v = VECTOR_INIT ((double) huge);
const v64df tiny_v = VECTOR_INIT ((double) tiny);
v64si hx, lx;
EXTRACT_WORDS (hx, lx, x);
v64si k =(hx&0x7ff00000)>>20; /* extract exponent */
VECTOR_IF (k == 0, cond) /* 0 or subnormal x */
VECTOR_RETURN (x, cond & ((lx|(hx&0x7fffffff))==0)); /* +- 0 */
VECTOR_COND_MOVE (x, x * two54, cond);
GET_HIGH_WORD (hx, x, cond);
VECTOR_COND_MOVE (k, ((hx&0x7ff00000)>>20) - 54, cond);
VECTOR_RETURN (tiny*x, cond & (n < -50000)); /*underflow*/
VECTOR_ENDIF
VECTOR_RETURN (x+x, k == 0x7ff); /* NaN or Inf */
/* in case integer overflow in n+k */
VECTOR_RETURN (huge_v * v64df_copysign_aux (huge_v, x, __mask), n > 50000);
k = k + n;
VECTOR_RETURN (huge_v * v64df_copysign_aux (huge_v, x, __mask), k > 0x7fe);
VECTOR_IF (k > 0, cond) /* normal result */
SET_HIGH_WORD (x, (hx&0x800fffff)|(k<<20), cond);
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_RETURN (tiny_v * v64df_copysign_aux (tiny_v, x, __mask), k <= -54); /*underflow*/
k += 54; /* subnormal result */
SET_HIGH_WORD (x, (hx&0x800fffff)|(k<<20), NO_COND);
VECTOR_RETURN (x * twom54, NO_COND);
FUNCTION_RETURN;
}

View File

@ -0,0 +1,39 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/s_signif.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_scalb_aux (v64df x, v64df fn, v64di);
v64si v64df_ilogb_aux (v64df x, v64si);
DEF_VD_MATH_FUNC (v64df, significand, v64df x)
{
v64si si_mask = __builtin_convertvector (__mask, v64si);
return v64df_scalb_aux (x, -__builtin_convertvector (v64df_ilogb_aux (x, si_mask), v64df), __mask);
}
DEF_VARIANTS (significand, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_sin.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_sine_aux (v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, sin, v64df x)
{
return v64df_sine_aux (x, 0, __mask);
}
DEF_VARIANTS (sin, df, df)

View File

@ -0,0 +1,126 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_sine.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_numtest (v64df x);
static const double HALF_PI = 1.57079632679489661923;
static const double ONE_OVER_PI = 0.31830988618379067154;
static const double r[] = { -0.16666666666666665052,
0.83333333333331650314e-02,
-0.19841269841201840457e-03,
0.27557319210152756119e-05,
-0.25052106798274584544e-07,
0.16058936490371589114e-09,
-0.76429178068910467734e-12,
0.27204790957888846175e-14 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC(v64df, sine, v64df x, int cosine)
{
const double YMAX = 210828714.0;
FUNCTION_INIT (v64df);
v64si num_type = v64df_numtest (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
VECTOR_ENDIF
/* Use sin and cos properties to ease computations. */
v64di sgn;
v64df y;
if (cosine)
{
sgn = VECTOR_INIT (0L);
y = __builtin_gcn_fabsv (x) + HALF_PI;
}
else
{
sgn = x < 0.0;
y = VECTOR_MERGE (-x, x, x < 0.0);
}
/* Check for values of y that will overflow here. */
VECTOR_IF (y > YMAX, cond)
errno = ERANGE;
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
/* Calculate the exponent. */
v64si Nneg = __builtin_convertvector (y * ONE_OVER_PI - 0.5, v64si);
v64si Npos = __builtin_convertvector (y * ONE_OVER_PI + 0.5, v64si);
v64si N = VECTOR_MERGE (Nneg, Npos, y < 0.0);
v64df XN = __builtin_convertvector (N, v64df);
VECTOR_COND_MOVE (sgn, ~sgn, (N & 1) != 0);
if (cosine)
XN -= 0.5;
y = __builtin_gcn_fabsv (x) - XN * __PI;
v64df res;
VECTOR_IF ((-z_rooteps < y) & (y < z_rooteps), cond)
VECTOR_COND_MOVE (res, y, cond);
VECTOR_ELSE (cond)
v64df g = y * y;
/* Calculate the Taylor series. */
v64df R = (((((((r[6] * g + r[5]) * g + r[4]) * g + r[3]) * g + r[2]) * g + r[1]) * g + r[0]) * g);
/* Finally, compute the result. */
VECTOR_COND_MOVE (res, y + y * R, cond);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, sgn);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,130 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/s_sineh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_exp_aux (v64df, v64di);
v64si v64df_numtest (v64df);
v64si v64df_ispos (v64df);
static const double q[] = { -0.21108770058106271242e+7,
0.36162723109421836460e+5,
-0.27773523119650701667e+3 };
static const double p[] = { -0.35181283430177117881e+6,
-0.11563521196851768270e+5,
-0.16375798202630751372e+3,
-0.78966127417357099479 };
static const double LNV = 0.6931610107421875000;
static const double INV_V2 = 0.24999308500451499336;
static const double V_OVER2_MINUS1 = 0.13830277879601902638e-4;
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, sineh, v64df x, int cosineh)
{
const double WBAR = 18.55;
FUNCTION_INIT (v64df);
v64si sgn = VECTOR_INIT (0);
v64di v_cosineh = VECTOR_INIT (cosineh ? -1L : 0L);
/* Check for special values. */
v64si num_type = v64df_numtest (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity.d),
VECTOR_INIT (-z_infinity.d),
v64df_ispos (x)),
cond);
VECTOR_ENDIF
v64df y = __builtin_gcn_fabsv (x);
if (!cosineh)
VECTOR_COND_MOVE (sgn, VECTOR_INIT (-1), x < 0.0);
v64df res;
VECTOR_IF (((y > 1.0) & ~v_cosineh) | v_cosineh, cond)
VECTOR_IF2 (y > BIGX, cond2, cond)
v64df w = y - LNV;
/* Check for w > maximum here. */
VECTOR_IF2 (w > BIGX, cond3, cond2)
errno = ERANGE;
VECTOR_RETURN (x, cond3);
VECTOR_ENDIF
v64df z = v64df_exp_aux (w, __mask);
VECTOR_COND_MOVE (res, z * (V_OVER2_MINUS1 + 1.0),
cond2 & (w > WBAR));
VECTOR_ELSE2 (cond2, cond)
v64df z = v64df_exp_aux (y, __mask);
if (cosineh)
VECTOR_COND_MOVE (res, (z + 1 / z) * 0.5, cond2);
else
VECTOR_COND_MOVE (res, (z - 1 / z) * 0.5, cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, sgn);
VECTOR_ELSE (cond)
/* Check for y being too small. */
VECTOR_IF2 (y < z_rooteps, cond2, cond);
VECTOR_COND_MOVE (res, x, cond2);
VECTOR_ELSE2 (cond2, cond)
/* Calculate the Taylor series. */
v64df f = x * x;
v64df Q = ((f + q[2]) * f + q[1]) * f + q[0];
v64df P = ((p[3] * f + p[2]) * f + p[1]) * f + p[0];
v64df R = f * (P / Q);
VECTOR_COND_MOVE (res, x + x * R, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/s_sinh.c. */
#include "amdgcnmach.h"
v64df v64df_sineh_aux (v64df, int, v64di);
DEF_VD_MATH_FUNC (v64df, sinh, v64df x)
{
return v64df_sineh_aux (x, 0, __mask);
}
DEF_VARIANTS (sinh, df, df)

View File

@ -0,0 +1,104 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/*****************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
*****************************************************************/
/* Based on newlib/libm/mathfp/s_sqrt.c in Newlib. */
#include "amdgcnmach.h"
v64si v64df_numtest (v64df);
v64si v64df_ispos (v64df);
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_frexpv_mant) \
&& __has_builtin (__builtin_gcn_frexpv_exp) \
&& __has_builtin (__builtin_gcn_ldexpv)
DEF_VD_MATH_FUNC (v64df, sqrt, v64df x)
{
FUNCTION_INIT (v64df);
/* Check for special values. */
v64si num_type = v64df_numtest (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
VECTOR_IF2 (v64df_ispos (x), cond2, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond2);
VECTOR_ELSE2 (cond2,cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (z_infinity.d), cond);
VECTOR_ENDIF
VECTOR_ENDIF
/* Initial checks are performed here. */
VECTOR_IF (x == 0.0, cond)
VECTOR_RETURN (VECTOR_INIT (0.0), cond);
VECTOR_ENDIF
VECTOR_IF (x < 0.0, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum.d), cond);
VECTOR_ENDIF
/* Find the exponent and mantissa for the form x = f * 2^exp. */
v64df f = __builtin_gcn_frexpv_mant (x);
v64si exp = __builtin_gcn_frexpv_exp (x);
v64si odd = (exp & 1) != 0;
/* Get the initial approximation. */
v64df y = 0.41731 + 0.59016 * f;
f *= 0.5f;
/* Calculate the remaining iterations. */
y = y * 0.5f + f / y;
y = y * 0.5f + f / y;
y = y * 0.5f + f / y;
/* Calculate the final value. */
VECTOR_COND_MOVE (y, y * __SQRT_HALF, odd);
VECTOR_COND_MOVE (exp, exp + 1, odd);
exp >>= 1;
y = __builtin_gcn_ldexpv (y, exp);
VECTOR_RETURN (y, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (sqrt, df, df)
#endif

View File

@ -0,0 +1,156 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/math/kf_tan.c in Newlib. */
#include "amdgcnmach.h"
static const double
pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
T[] = {
3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
-1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
};
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
static v64df
v64df_kernel_tan (v64df x, v64df y, v64si iy, v64di __mask)
{
FUNCTION_INIT (v64df);
v64si hx;
GET_HIGH_WORD(hx, x, NO_COND);
v64si ix = hx & 0x7fffffff; /* high word of |x| */
VECTOR_IF (ix < 0x3e300000, cond) /* x < 2**-28 */
VECTOR_IF2(__builtin_convertvector (x, v64si)==0, cond2, cond) /* generate inexact */
v64si low;
GET_LOW_WORD (low, x, cond2);
VECTOR_RETURN (1.0 / __builtin_gcn_fabsv (x), ((ix|low)|(iy+1))==0);
VECTOR_RETURN (x, cond2 & (iy == 1));
v64df z, w;
z = w = x + y;
SET_LOW_WORD (z, VECTOR_INIT (0.0), cond2);
v64df v = y - (z - x);
v64df t, a;
t = a = -1.0 / w;
SET_LOW_WORD(t, VECTOR_INIT (0.0), cond2);
v64df s = 1.0 + t * z;
VECTOR_RETURN ( t + a * (s + t * v), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF(ix>=0x3FE59428, cond) /* |x|>=0.6744 */
VECTOR_COND_MOVE (x, -x, cond & (hx < 0));
VECTOR_COND_MOVE (y, -y, cond & (hx < 0));
v64df z = pio4-x;
v64df w = pio4lo-y;
VECTOR_COND_MOVE (x, z+w, cond);
VECTOR_COND_MOVE (y, VECTOR_INIT (0.0), cond);
VECTOR_ENDIF
v64df z = x*x;
v64df w = z*z;
/* Break x^5*(T[1]+x^2*T[2]+...) into
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/
v64df r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
v64df v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
v64df s = z*x;
r = y + z*(s*(r+v)+y);
r += T[0]*s;
w = x+r;
VECTOR_IF(ix>=0x3FE59428, cond)
v = __builtin_convertvector (iy, v64df);
VECTOR_RETURN (__builtin_convertvector (1-((hx>>30)&2), v64df)
* (v-2.0*(x-(w*w/(w+v)-r))), cond);
VECTOR_ENDIF
VECTOR_RETURN (w, iy == 1);
/* if allow error up to 2 ulp,
simply return -1.0/(x+r) here */
/* compute -1.0/(x+r) accurately */
z = w;
SET_LOW_WORD (z, VECTOR_INIT (0), NO_COND);
v = r - (z - x); /* z+v = r+x */
v64df a, t;
t = a = -1.0/w; /* a = -1.0/w */
SET_LOW_WORD(t, VECTOR_INIT (0), NO_COND);
s = 1.0+t*z;
VECTOR_RETURN (t+a*(s+t*v), NO_COND);
FUNCTION_RETURN;
}
static v64si
v64df_rem_pio2 (v64df x, v64df *y)
{
v64df r = x * __INV_PI_OVER_TWO_2_24;
v64si n = (__builtin_convertvector (r, v64si) + 0x800000) >> 24;
x = x - __builtin_convertvector (n, v64df) * __PI_OVER_TWO;
y[0] = x;
y[1] = x - y[0];
return n;
}
DEF_VD_MATH_FUNC (v64df, tan, v64df x)
{
FUNCTION_INIT (v64df);
v64si ix;
GET_HIGH_WORD (ix, x, NO_COND);
/* |x| ~< pi/4 */
ix &= 0x7fffffff;
VECTOR_RETURN (v64df_kernel_tan (x, VECTOR_INIT (0.0), VECTOR_INIT (1), __mask),
ix <= 0x3fe921fb);
/* tan(Inf or NaN) is NaN */
VECTOR_RETURN (x-x, ix >= 0x7ff00000); /* NaN */
/* argument reduction needed */
v64df y[2];
v64si n = v64df_rem_pio2 (x,y);
VECTOR_RETURN (v64df_kernel_tan (y[0], y[1], 1-((n&1)<<1), __mask), // 1 -- n even
NO_COND); // -1 -- n odd
FUNCTION_RETURN;
}
DEF_VARIANTS (tan, df, df)
#endif

View File

@ -0,0 +1,92 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/*****************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
*****************************************************************/
/* Based on newlib/libm/mathfp/s_tanh.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_exp_aux (v64df, v64di);
static const double LN3_OVER2 = 0.54930614433405484570;
static const double p[] = { -0.16134119023996228053e+4,
-0.99225929672236083313e+2,
-0.96437492777225469787 };
static const double q[] = { 0.48402357071988688686e+4,
0.22337720718962312926e+4,
0.11274474380534949335e+3 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsv)
DEF_VD_MATH_FUNC (v64df, tanh, v64df x)
{
FUNCTION_INIT (v64df);
v64df f = __builtin_gcn_fabsv (x);
v64df res;
/* Check if the input is too big. */
VECTOR_IF (f > BIGX, cond)
VECTOR_COND_MOVE (res, VECTOR_INIT (1.0), cond);
VECTOR_ELSEIF (f > LN3_OVER2, cond)
VECTOR_COND_MOVE (res, 1.0 - 2.0 / (v64df_exp_aux (2 * f, __mask) + 1.0),
cond);
/* Check if the input is too small. */
VECTOR_ELSEIF (f < z_rooteps, cond)
VECTOR_COND_MOVE (res, f, cond);
/* Calculate the Taylor series. */
VECTOR_ELSE (cond)
v64df g = f * f;
v64df P = (p[2] * g + p[1]) * g + p[0];
v64df Q = ((g + q[2]) * g + q[1]) * g + q[0];
v64df R = g * (P / Q);
VECTOR_COND_MOVE (res, f + f * R, cond);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, x < 0.0);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (tanh, df, df)
#endif

View File

@ -0,0 +1,42 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/e_tgamma.c in Newlib. */
#include "amdgcnmach.h"
v64df v64df_exp_aux (v64df x, v64di __mask);
v64df v64df_lgamma_r_aux (v64df x, v64si *signgamp, v64di __mask);
DEF_VD_MATH_FUNC (v64df, tgamma, v64df x)
{
v64si signgam_local;
v64df y = v64df_exp_aux(v64df_lgamma_r_aux(x, &signgam_local, __mask), __mask);
VECTOR_COND_MOVE(y, -y, signgam_local < 0);
return y;
}
DEF_VARIANTS (tgamma, df, df)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_acos.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_asinef_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, acosf, v64sf x)
{
return v64sf_asinef_aux(x, 1, __mask);
}
DEF_VARIANTS (acosf, sf, sf)

View File

@ -0,0 +1,80 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/mathfp/ef_acosh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_logf_aux (v64sf, v64si);
v64sf v64sf_log1pf_aux (v64sf, v64si);
v64sf v64sf_sqrtf_aux (v64sf, v64si);
DEF_VS_MATH_FUNC (v64sf, acoshf, v64sf x)
{
static const float one = 1.0;
static const float ln2 = 6.9314718246e-01; /* 0x3f317218 */
FUNCTION_INIT (v64sf);
v64si hx;
GET_FLOAT_WORD (hx, x, NO_COND);
VECTOR_IF (hx < 0x3f800000, cond) // x < 1 */
VECTOR_RETURN ((x-x) / (x-x), cond);
VECTOR_ENDIF
VECTOR_IF (hx >=0x4d800000, cond) // x > 2**28 */
VECTOR_IF2 (hx >=0x7f800000, cond2, cond) // x is inf of NaN */
VECTOR_RETURN (x+x, cond2);
VECTOR_ELSE (cond2)
/* acosh(huge)=log(2x) */
VECTOR_RETURN (v64sf_logf_aux (x, __mask) + ln2, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (hx == 0x3f800000, cond)
/* acosh(1) = 0 */
VECTOR_RETURN (VECTOR_INIT (0.0f), cond);
VECTOR_ENDIF
VECTOR_IF (hx > 0x40000000, cond) /* 2**28 > x > 2 */
{
v64sf t = x * x;
VECTOR_RETURN (v64sf_logf_aux (2.0f*x - 1.0f /
(x + v64sf_sqrtf_aux (t - 1.0f, __mask)),
__mask),
cond);
}
VECTOR_ELSE (cond) /* 1<x<2 */
{
v64sf t = x - 1.0f;
VECTOR_RETURN (v64sf_log1pf_aux (t + v64sf_sqrtf_aux(2.0*t + t*t, __mask),
__mask),
cond);
}
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (acoshf, sf, sf)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_asin.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_asinef_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, asinf, v64sf x)
{
return v64sf_asinef_aux (x, 0, __mask);
}
DEF_VARIANTS (asinf, sf, sf)

View File

@ -0,0 +1,127 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/sf_asine.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_numtestf (v64sf);
v64sf v64sf_sqrtf (v64sf);
static const float p[] = { 0.933935835, -0.504400557 };
static const float q[] = { 0.560363004e+1, -0.554846723e+1 };
static const float a[] = { 0.0, 0.785398163 };
static const float b[] = { 1.570796326, 0.785398163 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, asinef, v64sf x, int acosine)
{
FUNCTION_INIT (v64sf);
v64si branch = VECTOR_INIT (0);
/* Check for special values. */
v64si i = v64sf_numtestf (x);
VECTOR_IF ((i == NAN) | (i == INF), cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_MERGE (x, VECTOR_INIT (z_infinity_f.f),
i == NAN),
cond);
VECTOR_ENDIF
v64sf y = __builtin_gcn_fabsvf (x);
v64sf g, res;
VECTOR_IF (y > 0.5f, cond)
VECTOR_COND_MOVE (i, VECTOR_INIT (1 - acosine), cond);
/* Check for range error. */
VECTOR_IF2 (y > 1.0f, cond2, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (g, (1.0f - y) / 2.0f, cond);
VECTOR_COND_MOVE (y, -2.0f * v64sf_sqrtf (g), cond);
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (i, VECTOR_INIT (acosine), cond);
VECTOR_IF2 (y < z_rooteps_f, cond2, cond)
VECTOR_COND_MOVE (res, y, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (g, y * y, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF ((y >= z_rooteps_f) | branch, cond)
{
/* Calculate the Taylor series. */
v64sf P = (p[1] * g + p[0]) * g;
v64sf Q = (g + q[1]) * g + q[0];
v64sf R = P / Q;
VECTOR_COND_MOVE (res, y + y * R, cond);
}
VECTOR_ENDIF
v64sf a_i = VECTOR_MERGE (VECTOR_INIT (a[1]), VECTOR_INIT (a[0]), i != 0);
/* Calculate asine or acose. */
if (acosine == 0)
{
VECTOR_COND_MOVE (res, (a_i + res) + a_i, NO_COND);
VECTOR_IF (x < 0.0f, cond)
VECTOR_COND_MOVE (res, -res, cond);
VECTOR_ENDIF
}
else
{
v64sf b_i = VECTOR_MERGE (VECTOR_INIT(b[1]), VECTOR_INIT(b[0]), i != 0);
VECTOR_IF (x < 0.0f, cond)
VECTOR_COND_MOVE (res, (b_i + res) + b_i, cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (res, (a_i - res) + a_i, cond);
VECTOR_ENDIF
}
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,82 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/sf_asinh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_logf_aux (v64sf, v64si);
v64sf v64sf_log1pf_aux (v64sf, v64si);
v64sf v64sf_sqrtf_aux (v64sf, v64si);
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, asinhf, v64sf x)
{
static const float one = 1.0000000000e+00; /* 0x3F800000 */
static const float ln2 = 6.9314718246e-01; /* 0x3f317218 */
static const float huge = 1.0000000000e+30;
FUNCTION_INIT (v64sf);
v64sf w;
v64si hx;
GET_FLOAT_WORD (hx, x, NO_COND);
v64si ix = hx & 0x7fffffff;
VECTOR_IF (ix >=0x7f800000, cond) /* x is inf or NaN */
VECTOR_RETURN (x + x, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x31800000, cond) /* |x|<2**-28 */
VECTOR_IF2 (huge+x > one, cond2, cond) /* return x inexact except 0 */
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (ix > 0x4d800000, cond) /* x > 2**28 */
VECTOR_COND_MOVE (w, v64sf_logf_aux (__builtin_gcn_fabsvf (x), __mask) +
ln2,
cond);
VECTOR_ELSEIF (ix > 0x40000000, cond) /* 2**28 > |x| > 2.0 */
v64sf t = __builtin_gcn_fabsvf (x);
VECTOR_COND_MOVE (w, v64sf_logf_aux (2.0f * t + 1.0f / (v64sf_sqrtf_aux (x*x + 1.0f, __mask) + t), __mask),
cond);
VECTOR_ELSE (cond) /* 2.0 > |x| > 2**-28 */
v64sf t = x * x;
VECTOR_COND_MOVE (w, v64sf_log1pf_aux (__builtin_gcn_fabsvf (x) + t / (1.0f + v64sf_sqrtf_aux (1.0f + t, __mask)), __mask),
cond);
VECTOR_ENDIF
VECTOR_IF (hx > 0, cond)
VECTOR_RETURN (w, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (-w, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (asinhf, sf, sf)
#endif

View File

@ -0,0 +1,42 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_atan.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_atangentf_aux (v64sf, v64sf, v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, atanf, v64sf x)
{
return (v64sf_atangentf_aux (x, VECTOR_INIT (0.0f), VECTOR_INIT (0.0f),
0, __mask));
}
DEF_VARIANTS (atanf, sf, sf)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_atan2.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_atangentf_aux (v64sf, v64sf, v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, atan2f, v64sf v, v64sf u)
{
return v64sf_atangentf_aux (VECTOR_INIT (0.0f), v, u, 1, __mask);
}
DEF_VARIANTS2 (atan2f, sf, sf)

View File

@ -0,0 +1,152 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/sf_atangent.c in Newlib. */
#include <float.h>
#include "amdgcnmach.h"
static const float ROOT3 = 1.732050807;
static const float a[] = { 0.0, 0.523598775, 1.570796326,
1.047197551 };
static const float q[] = { 0.1412500740e+1 };
static const float p[] = { -0.4708325141, -0.5090958253e-1 };
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_frexpvf_exp) \
&& __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, atangentf, v64sf x, v64sf v, v64sf u, int arctan2)
{
FUNCTION_INIT (v64sf);
v64sf zero = VECTOR_INIT (0.0f);
v64sf res;
v64si branch = VECTOR_INIT (0);
/* Preparation for calculating arctan2. */
if (arctan2)
{
VECTOR_IF (u == 0.0f, cond)
VECTOR_IF2 (v == 0.0f, cond2, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (~branch, cond)
/* Get the exponent values of the inputs. */
v64si expv = __builtin_gcn_frexpvf_exp (v);
v64si expu = __builtin_gcn_frexpvf_exp (u);
/* See if a divide will overflow. */
v64si e = expv - expu;
VECTOR_IF2 (e > FLT_MAX_EXP, cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO), cond2);
VECTOR_ENDIF
/* Also check for underflow. */
VECTOR_IF2 (e < FLT_MIN_EXP, cond2, cond)
VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
VECTOR_COND_MOVE (res, zero, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
}
VECTOR_IF (~branch, cond)
v64sf f;
v64si N = VECTOR_INIT (0);
if (arctan2)
f = __builtin_gcn_fabsvf (v / u);
else
f = __builtin_gcn_fabsvf (x);
VECTOR_IF2 (f > 1.0f, cond2, cond)
VECTOR_COND_MOVE (f, 1.0f / f, cond2);
VECTOR_COND_MOVE (N, VECTOR_INIT (2), cond2);
VECTOR_ENDIF
VECTOR_IF2 (f > (2.0f - ROOT3), cond2, cond)
float A = ROOT3 - 1.0f;
VECTOR_COND_MOVE (f, (((A * f - 0.5f) - 0.5f) + f) / (ROOT3 + f),
cond2);
N += cond2 & 1;
VECTOR_ENDIF
/* Check for values that are too small. */
VECTOR_IF2 ((-z_rooteps_f < f) & (f < z_rooteps_f), cond2, cond)
VECTOR_COND_MOVE (res, f, cond2);
/* Calculate the Taylor series. */
VECTOR_ELSE2 (cond2, cond)
v64sf g = f * f;
v64sf P = (p[1] * g + p[0]) * g;
v64sf Q = g + q[0];
v64sf R = P / Q;
VECTOR_COND_MOVE (res, f + f * R, cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, cond & (N > 1));
res += VECTOR_MERGE (VECTOR_INIT (a[1]), zero, cond & (N == 1));
res += VECTOR_MERGE (VECTOR_INIT (a[2]), zero, cond & (N == 2));
res += VECTOR_MERGE (VECTOR_INIT (a[3]), zero, cond & (N == 3));
VECTOR_ENDIF
if (arctan2)
{
/*if (u < 0.0)*/
VECTOR_COND_MOVE (res, (float) __PI - res, u < 0.0f);
/*if (v < 0.0)*/
VECTOR_COND_MOVE (res, -res, v < 0.0f);
}
/*else if (x < 0.0) */
else
VECTOR_COND_MOVE (res, -res, x < 0.0f);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,75 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/mathfp/ef_atanh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_log1pf_aux (v64sf, v64si);
DEF_VS_MATH_FUNC (v64sf, atanhf, v64sf x)
{
static const float zero = 0.0;
static const float one = 1.0, huge = 1e30;
FUNCTION_INIT (v64sf);
v64sf t;
v64si hx;
GET_FLOAT_WORD (hx, x, NO_COND);
v64si ix = hx & 0x7fffffff;
VECTOR_IF (ix > 0x3f800000, cond) // |x|>1
VECTOR_RETURN ((x - x)/(x - x), cond);
VECTOR_ENDIF
VECTOR_IF (ix == 0x3f800000, cond)
VECTOR_RETURN (x / zero, cond);
VECTOR_ENDIF
VECTOR_IF ((ix < 0x31800000) & ((huge + x) > zero), cond) // x<2**-28
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
SET_FLOAT_WORD (x, ix, NO_COND);
VECTOR_IF (ix < 0x3f000000, cond) // x < 0.5 */
v64sf t2 = x + x;
VECTOR_COND_MOVE (t, 0.5f * v64sf_log1pf_aux (t2 + t2 * x / (one - x), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (t, 0.5f * v64sf_log1pf_aux ((x + x) / (one - x), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (hx >= 0, cond)
VECTOR_RETURN (t, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (-t, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (atanhf, sf, sf)

View File

@ -0,0 +1,43 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_copysign.c in Newlib. */
#include "amdgcnmach.h"
DEF_VS_MATH_FUNC (v64sf, copysignf, v64sf x, v64sf y)
{
FUNCTION_INIT (v64sf);
v64si ix, iy;
GET_FLOAT_WORD (ix, x, NO_COND);
GET_FLOAT_WORD (iy, y, NO_COND);
SET_FLOAT_WORD (x, (ix & 0x7fffffff) | (iy & 0x80000000), NO_COND);
VECTOR_RETURN (x, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (copysignf, sf, sf)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_cos.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_sinef_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, cosf, v64sf x)
{
return v64sf_sinef_aux (x, 1, __mask);
}
DEF_VARIANTS (cosf, sf, sf)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_cosh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_sinehf_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, coshf, v64sf x)
{
return v64sf_sinehf_aux (x, 1, __mask);
}
DEF_VARIANTS (coshf, sf, sf)

View File

@ -0,0 +1,186 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/sf_erf.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_expf_aux (v64sf, v64si);
static const float
tiny = 1e-30,
half= 5.0000000000e-01, /* 0x3F000000 */
one = 1.0000000000e+00, /* 0x3F800000 */
two = 2.0000000000e+00, /* 0x40000000 */
/* c = (subfloat)0.84506291151 */
erx = 8.4506291151e-01, /* 0x3f58560b */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx = 1.2837916613e-01, /* 0x3e0375d4 */
efx8= 1.0270333290e+00, /* 0x3f8375d4 */
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
pp1 = -3.2504209876e-01, /* 0xbea66beb */
pp2 = -2.8481749818e-02, /* 0xbce9528f */
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
qq4 = 1.3249473704e-04, /* 0x390aee49 */
qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
qa4 = 1.2617121637e-01, /* 0x3e013307 */
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
ra2 = -1.0558626175e+01, /* 0xc128f022 */
ra3 = -6.2375331879e+01, /* 0xc2798057 */
ra4 = -1.6239666748e+02, /* 0xc322658c */
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
ra7 = -9.8143291473e+00, /* 0xc11d077e */
sa1 = 1.9651271820e+01, /* 0x419d35ce */
sa2 = 1.3765776062e+02, /* 0x4309a863 */
sa3 = 4.3456588745e+02, /* 0x43d9486f */
sa4 = 6.4538726807e+02, /* 0x442158c9 */
sa5 = 4.2900814819e+02, /* 0x43d6810b */
sa6 = 1.0863500214e+02, /* 0x42d9451f */
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
rb2 = -1.7757955551e+01, /* 0xc18e104b */
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
rb5 = -1.0250950928e+03, /* 0xc480230b */
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
sb3 = 1.5367296143e+03, /* 0x44c01759 */
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
sb5 = 2.5530502930e+03, /* 0x451f90ce */
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, erff, v64sf x)
{
FUNCTION_INIT (v64sf);
v64si hx;
GET_FLOAT_WORD (hx, x, NO_COND);
v64si ix = hx & 0x7fffffff;
VECTOR_IF (ix >= 0x7f800000, cond) /* erf(nan)=nan */
v64si i = (hx >> 31) << 1;
/* erf(+-inf)=+-1 */
VECTOR_RETURN (__builtin_convertvector (1 - i, v64sf) + 1.0f / x, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3f580000, cond) /* |x|<0.84375 */
VECTOR_IF2 (ix < 0x31800000, cond2, cond) /* |x|<2**-28 */
VECTOR_IF2 (ix < 0x04000000, cond3, cond2) /* avoid underflow */
VECTOR_RETURN (0.125f*(8.0f*x + efx8*x), cond3);
VECTOR_ENDIF
VECTOR_RETURN (x + efx*x, cond2);
VECTOR_ENDIF
v64sf z = x*x;
v64sf r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
v64sf s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
v64sf y = r/s;
VECTOR_RETURN (x + x*y, cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x3fa00000, cond) /* 0.84375 <= |x| < 1.25 */
v64sf s = __builtin_gcn_fabsvf (x) - 1.0f;
v64sf P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
v64sf Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
VECTOR_IF2 (hx >= 0, cond2, cond)
VECTOR_RETURN (erx + P/Q, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (-erx - P/Q, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (ix >= 0x40c00000, cond) /* inf>|x|>=6 */
VECTOR_IF2 (hx >= 0, cond2, cond)
VECTOR_RETURN (VECTOR_INIT (1.0f - tiny), cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (VECTOR_INIT (tiny - 1.0f), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
x = __builtin_gcn_fabsvf(x);
v64sf s = 1.0f / (x*x);
v64sf R, S;
VECTOR_IF (ix < 0x4036DB6E, cond) /* |x| < 1/0.35 */
VECTOR_COND_MOVE (R, ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7)))))), cond);
VECTOR_COND_MOVE (S, one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8))))))), cond);
VECTOR_ELSE (cond) /* |x| >= 1/0.35 */
VECTOR_COND_MOVE (R, rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6))))), cond);
VECTOR_COND_MOVE (S, one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7)))))), cond);
VECTOR_ENDIF
GET_FLOAT_WORD (ix, x, NO_COND);
v64sf z;
SET_FLOAT_WORD (z, ix & 0xfffff000, NO_COND);
v64sf r = v64sf_expf_aux (-z*z - 0.5625f, __mask)
* v64sf_expf_aux ((z-x)*(z+x) + R/S, __mask);
VECTOR_RETURN (one - r/x, hx >= 0);
VECTOR_RETURN (r/x - one, hx < 0);
FUNCTION_RETURN;
}
DEF_VARIANTS (erff, sf, sf)
#endif

View File

@ -0,0 +1,102 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/math/ef_exp.c in Newlib. */
#include "amdgcnmach.h"
static const float
huge = 1.0e+30,
twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
ln2HI[2] ={ 6.9313812256e-01, /* 0x3f317180 */
-6.9313812256e-01,}, /* 0xbf317180 */
ln2LO[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */
-9.0580006145e-06,}, /* 0xb717f7d1 */
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
P2 = -2.7777778450e-03, /* 0xbb360b61 */
P3 = 6.6137559770e-05, /* 0x388ab355 */
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
P5 = 4.1381369442e-08; /* 0x3331bb4c */
DEF_VS_MATH_FUNC (v64sf, expf, v64sf x)
{
FUNCTION_INIT (v64sf);
v64si k = VECTOR_INIT (0);
v64si sx;
GET_FLOAT_WORD(sx, x, NO_COND);
v64si xsb = (sx>>31)&1; /* sign bit of x */
v64si hx = sx & 0x7fffffff; /* high word of |x| */
/* filter out non-finite argument */
VECTOR_RETURN (x+x, FLT_UWORD_IS_NAN(hx)); /* NaN */
VECTOR_RETURN (x, FLT_UWORD_IS_INFINITE(hx) & (xsb == 0));
VECTOR_RETURN (VECTOR_INIT (0.0f), FLT_UWORD_IS_INFINITE (hx)); /* exp(+-inf)={inf,0} */
VECTOR_RETURN (v64sf_math_oflowf (VECTOR_INIT (0)), sx > FLT_UWORD_LOG_MAX); /* overflow */
VECTOR_RETURN (v64sf_math_uflowf (VECTOR_INIT (0)), (sx < 0) & (hx > FLT_UWORD_LOG_MIN)); /* underflow */
/* argument reduction */
v64sf hi, lo;
VECTOR_IF (hx > 0x3eb17218, cond) /* if |x| > 0.5 ln2 */
VECTOR_IF2 (hx < 0x3F851592, cond2, cond) /* and |x| < 1.5 ln2 */
VECTOR_COND_MOVE (hi, x-ln2HI[0], cond2 & (xsb == 0));
VECTOR_COND_MOVE (hi, x-ln2HI[1], cond2 & (xsb == 1));
VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[0]), cond2 & (xsb == 0));
VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[1]), cond2 & (xsb == 1));
VECTOR_COND_MOVE (k, 1-xsb-xsb, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x + 0.5f, v64si), cond2 & (xsb == 0));
VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x - 0.5f, v64si), cond2 & (xsb == 1));
v64sf t = __builtin_convertvector (k, v64sf);
VECTOR_COND_MOVE (hi, x - t*ln2HI[0], cond2); /* t*ln2HI is exact here */
VECTOR_COND_MOVE (lo, t*ln2LO[0], cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (x, hi - lo, cond);
VECTOR_ELSEIF (hx < 0x34000000, cond) /* when |x|<2**-23 */
VECTOR_RETURN (1.0f+x, cond & (huge+x > 1.0f)); /* trigger inexact */
VECTOR_ENDIF
/* x is now in primary range */
v64sf t = x*x;
v64sf c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
VECTOR_RETURN (1.0f - ((x*c)/(c-2.0f)-x), k==0);
v64sf y = 1.0f - ((lo-(x*c)/(2.0f-c))-hi);
VECTOR_IF (k >= -125, cond)
v64si hy;
GET_FLOAT_WORD(hy, y, cond);
SET_FLOAT_WORD(y,hy+(k<<23), cond); /* add k to y's exponent */
VECTOR_RETURN (y, cond);
VECTOR_ELSE (cond)
v64si hy;
GET_FLOAT_WORD(hy, y, cond);
SET_FLOAT_WORD(y, hy+((k+100)<<23), cond); /* add k to y's exponent */
VECTOR_RETURN (y*twom100, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS (expf, sf, sf)

View File

@ -0,0 +1,32 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* Copyright (C) 2002 by Red Hat, Incorporated. All rights reserved.
*
* Permission to use, copy, modify, and distribute this software
* is freely granted, provided that this notice is preserved.
*/
/* Based on newlib/libm/mathfp/sf_exp2.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_powf_aux (v64sf, v64sf, v64si);
DEF_VS_MATH_FUNC (v64sf, exp2f, v64sf x)
{
return v64sf_powf_aux (VECTOR_INIT (2.0f), x, __mask);
}
DEF_VARIANTS (exp2f, sf, sf)

View File

@ -0,0 +1,37 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/v64sf_finite.c in Newlib. */
#include "amdgcnmach.h"
DEF_VS_MATH_FUNC (v64si, finitef, v64sf x)
{
v64si ix = CAST_VECTOR (v64si, x);
ix &= 0x7fffffff;
return FLT_UWORD_IS_FINITE(ix);
}
DEF_VARIANTS (finitef, si, sf)

View File

@ -0,0 +1,147 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/sf_fmod.c in Newlib. */
#include "amdgcnmach.h"
DEF_VS_MATH_FUNC (v64sf, fmodf, v64sf x, v64sf y)
{
FUNCTION_INIT(v64sf);
v64si hx, hy, hz;
GET_FLOAT_WORD (hx, x, NO_COND);
GET_FLOAT_WORD (hy, y, NO_COND);
v64si sx = hx & 0x80000000; /* sign of x */
hx ^=sx; /* |x| */
hy &= 0x7fffffff; /* |y| */
v64sf zeroes = VECTOR_MERGE (VECTOR_INIT (-0.0f),
VECTOR_INIT (0.0f),
sx != 0);
/* purge off exception values */
VECTOR_IF ((hy == 0) | (hx >= 0x7f800000)
| (hy > 0x7f800000), cond) // y=0, or x not finite or y is NaN
VECTOR_RETURN ((x * y) / (x * y), cond);
VECTOR_ENDIF
VECTOR_IF (hx < hy, cond) // |x|<|y| return x
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_IF (hx == hy, cond)
VECTOR_RETURN (zeroes, hx == hy); // |x|=|y| return x*0
VECTOR_ENDIF
/* determine ix = ilogb(x) */
v64si ix;
VECTOR_IF (hx < 0x00800000, cond) // subnormal x
ix = VECTOR_INIT (-126);
for (v64si i = (hx << 8);
!ALL_ZEROES_P (cond & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (ix, (hx >> 23) - 127, cond);
VECTOR_ENDIF
/* determine iy = ilogb(y) */
v64si iy;
VECTOR_IF (hy < 0x00800000, cond) // subnormal y
iy = VECTOR_INIT (-126);
for (v64si i = (hy << 8); !ALL_ZEROES_P (cond & (i >= 0)); i <<= 1)
VECTOR_COND_MOVE (iy, iy - 1, cond & (i >= 0));
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (iy, (hy >> 23) - 127, cond);
VECTOR_ENDIF
/* set up {hx,lx}, {hy,ly} and align y to x */
VECTOR_IF (ix >= -126, cond)
VECTOR_COND_MOVE (hx, 0x00800000 | (0x007fffff & hx), cond);
VECTOR_ELSE (cond) // subnormal x, shift x to normal
{
v64si n = -126 - ix;
VECTOR_COND_MOVE (hx, hx << n, cond);
}
VECTOR_ENDIF
VECTOR_IF (iy >= -126, cond)
VECTOR_COND_MOVE (hy, 0x00800000 | (0x007fffff & hy), cond);
VECTOR_ELSE (cond) // subnormal y, shift y to normal
{
v64si n = -126 - iy;
VECTOR_COND_MOVE (hy, hy << n, cond);
}
VECTOR_ENDIF
/* fix point fmod */
v64si n = ix - iy;
v64si cond = n != 0;
while (!ALL_ZEROES_P (cond))
{
hz = hx - hy;
VECTOR_IF2 (hz < 0, cond2, cond)
VECTOR_COND_MOVE (hx, hx + hx, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_IF2 (hz == 0, cond3, cond2) // return sign(x)*0
VECTOR_RETURN (zeroes, cond3);
VECTOR_ELSE2 (cond3, cond2)
VECTOR_COND_MOVE (hx, hz + hz, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
n += cond; // Active lanes should be -1
cond &= (n != 0);
}
hz = hx - hy;
VECTOR_COND_MOVE (hx, hz, hz >= 0);
/* convert back to floating value and restore the sign */
VECTOR_RETURN (zeroes, hx == 0); // return sign(x)*0
cond = hx < 0x00800000;
while (!ALL_ZEROES_P (cond)) // normalize x
{
VECTOR_COND_MOVE (hx, hx + hx, cond);
iy += cond; // Active lanes should be -1
cond &= (hx < 0x00800000);
}
VECTOR_IF (iy >= -126, cond) // normalize output
VECTOR_COND_MOVE (hx, (hx - 0x00800000) | ((iy + 127) << 23), cond);
SET_FLOAT_WORD (x, hx | sx, cond);
VECTOR_ELSE (cond) // subnormal output */
n = -126 - iy;
hx >>= n;
SET_FLOAT_WORD (x, hx | sx, cond);
x *= VECTOR_INIT (1.0f); /* create necessary signal */
VECTOR_ENDIF
VECTOR_RETURN (x, NO_COND); /* exact output */
FUNCTION_RETURN;
}
DEF_VARIANTS2 (fmodf, sf, sf)

View File

@ -0,0 +1,38 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/wf_gamma.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_lgammaf_aux (v64sf x, v64si __mask);
DEF_VS_MATH_FUNC (v64sf, gammaf, v64sf x)
{
return v64sf_lgammaf_aux(x, __mask);
}
DEF_VARIANTS (gammaf, sf, sf)

View File

@ -0,0 +1,118 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/ef_hypot.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_sqrtf_aux (v64sf, v64si);
DEF_VS_MATH_FUNC (v64sf, hypotf, v64sf x, v64sf y)
{
FUNCTION_INIT (v64sf);
v64sf a = x;
v64sf b = y;
v64si ha;
GET_FLOAT_WORD (ha, x, NO_COND);
ha &= 0x7fffffffL;
v64si hb;
GET_FLOAT_WORD (hb, y, NO_COND);
hb &= 0x7fffffffL;
VECTOR_IF (hb > ha, cond)
v64si j = ha;
VECTOR_COND_MOVE (ha, hb, cond);
VECTOR_COND_MOVE (hb, j, cond);
VECTOR_ENDIF
SET_FLOAT_WORD (a, ha, NO_COND); /* a <- |a| */
SET_FLOAT_WORD (b, hb, NO_COND); /* b <- |b| */
VECTOR_IF((ha - hb) > 0xf000000L, cond) // x/y > 2**30 */
VECTOR_RETURN (a + b, cond);
VECTOR_ENDIF
v64si k = VECTOR_INIT (0);
VECTOR_IF (ha > 0x58800000L, cond) /* a>2**50 */
VECTOR_IF2 (ha >= 0x7f800000L, cond2, cond) /* Inf or NaN */
v64sf w = a + b; // for sNaN */
VECTOR_COND_MOVE (w, a, cond2 & (ha == 0x7f800000));
VECTOR_COND_MOVE (w, b, cond2 & (hb == 0x7f800000));
VECTOR_RETURN (w, cond);
VECTOR_ENDIF
/* scale a and b by 2**-60 */
VECTOR_COND_MOVE (ha, ha - 0x5d800000, cond);
VECTOR_COND_MOVE (hb, hb - 0x5d800000, cond);
VECTOR_COND_MOVE (k, k + 60, cond);
SET_FLOAT_WORD (a, ha, cond);
SET_FLOAT_WORD (b, hb, cond);
VECTOR_ENDIF
VECTOR_IF (hb < 0x26800000, cond) /* b < 2**-50 */
VECTOR_IF2 (hb <= 0x007fffff, cond2, cond) /* subnormal b or 0 */
VECTOR_RETURN (a, cond2 & (hb == 0));
/* t1=2^126 */
v64sf t1;
SET_FLOAT_WORD (t1, VECTOR_INIT (0x3f000000), cond2);
VECTOR_COND_MOVE (b, b * t1, cond2);
VECTOR_COND_MOVE (a, a * t1, cond2);
VECTOR_COND_MOVE (k, k - 126, cond2);
VECTOR_ELSE2 (cond2, cond) /* scale a and b by 2^60 */
VECTOR_COND_MOVE (ha, ha + 0x5d800000, cond2); /* a *= 2^60 */
VECTOR_COND_MOVE (hb, hb + 0x5d800000, cond2); /* b *= 2^60 */
VECTOR_COND_MOVE (k, k - 60, cond2);
SET_FLOAT_WORD (a, ha, cond2);
SET_FLOAT_WORD (b, hb, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
/* medium size a and b */
v64sf w = a - b;
VECTOR_IF (w > b, cond)
v64sf t1;
SET_FLOAT_WORD (t1, ha & 0xfffff000, cond);
v64sf t2 = a - t1;
VECTOR_COND_MOVE (w, v64sf_sqrtf_aux (t1*t1 - (b*(-b) - t2 * (a + t1)), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (a, a+a, cond);
v64sf y1;
SET_FLOAT_WORD (y1, hb & 0xfffff000, cond);
v64sf y2 = b - y1;
v64sf t1;
SET_FLOAT_WORD (t1, ha + 0x00800000, cond);
v64sf t2 = a - t1;
VECTOR_COND_MOVE (w, v64sf_sqrtf_aux (t1*y1 - (w*(-w) - (t1*y2 + t2*b)), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (k != 0, cond)
v64sf t1;
SET_FLOAT_WORD (t1, 0x3f800000 + (k << 23), cond);
VECTOR_RETURN (t1 * w, cond);
VECTOR_ELSE (cond)
VECTOR_RETURN (w, cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}
DEF_VARIANTS2 (hypotf, sf, sf)

View File

@ -0,0 +1,56 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_ilogb.c in Newlib. */
#include "amdgcnmach.h"
DEF_VS_MATH_FUNC (v64si, ilogbf, v64sf x)
{
FUNCTION_INIT(v64si);
v64si hx, ix;
GET_FLOAT_WORD (hx, x, NO_COND);
hx &= 0x7fffffff;
VECTOR_IF (FLT_UWORD_IS_ZERO (hx), cond)
VECTOR_RETURN (VECTOR_INIT (-__INT_MAX__), cond); // FP_ILOGB0
VECTOR_ENDIF
VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond)
ix = VECTOR_INIT (-126);
for (v64si i = (hx << 8);
!ALL_ZEROES_P (cond & (i > 0));
i <<= 1)
VECTOR_COND_MOVE (ix, ix - 1, cond & (i > 0));
VECTOR_RETURN (ix, cond);
VECTOR_ELSEIF (~FLT_UWORD_IS_FINITE (hx), cond)
VECTOR_RETURN (VECTOR_INIT (__INT_MAX__), cond);
VECTOR_ENDIF
VECTOR_RETURN ((hx >> 23) - 127, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (ilogbf, si, sf)

View File

@ -0,0 +1,37 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_isnan.c in Newlib. */
#include "amdgcnmach.h"
DEF_VS_MATH_FUNC (v64si, isnanf, v64sf x)
{
v64si ix = CAST_VECTOR (v64si, x);
ix &= 0x7fffffff;
return FLT_UWORD_IS_NAN (ix);
}
DEF_VARIANTS (isnanf, si, sf)

View File

@ -0,0 +1,40 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_ispos.c in Newlib. */
#include "amdgcnmach.h"
v64si
v64sf_isposf (v64sf x)
{
v64si wx = CAST_VECTOR (v64si, x);
return (wx & 0x80000000) == 0;
}

View File

@ -0,0 +1,44 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/wf_lgamma.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_finitef_aux (v64sf x, v64si __mask);
v64sf v64sf_lgammaf_r_aux (v64sf x, v64si *signgamp, v64si __mask);
DEF_VS_MATH_FUNC (v64sf, lgammaf, v64sf x)
{
v64sf y = v64sf_lgammaf_r_aux(x, &(_REENT_V64SI_SIGNGAM(_V64_REENT)), __mask);
if (ALL_ZEROES_P(v64sf_finitef_aux(y, __mask)) & !ALL_ZEROES_P(v64sf_finitef_aux(x, __mask))) {
/* lgamma(finite) overflow */
errno = ERANGE;
}
return y;
}
DEF_VARIANTS (lgammaf, sf, sf)

View File

@ -0,0 +1,300 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/erf_lgamma.c in Newlib. */
#include "amdgcnmach.h"
static const float two23= 8.3886080000e+06, /* 0x4b000000 */
half= 5.0000000000e-01, /* 0x3f000000 */
one = 1.0000000000e+00, /* 0x3f800000 */
pi = 3.1415927410e+00, /* 0x40490fdb */
a0 = 7.7215664089e-02, /* 0x3d9e233f */
a1 = 3.2246702909e-01, /* 0x3ea51a66 */
a2 = 6.7352302372e-02, /* 0x3d89f001 */
a3 = 2.0580807701e-02, /* 0x3ca89915 */
a4 = 7.3855509982e-03, /* 0x3bf2027e */
a5 = 2.8905137442e-03, /* 0x3b3d6ec6 */
a6 = 1.1927076848e-03, /* 0x3a9c54a1 */
a7 = 5.1006977446e-04, /* 0x3a05b634 */
a8 = 2.2086278477e-04, /* 0x39679767 */
a9 = 1.0801156895e-04, /* 0x38e28445 */
a10 = 2.5214456400e-05, /* 0x37d383a2 */
a11 = 4.4864096708e-05, /* 0x383c2c75 */
tc = 1.4616321325e+00, /* 0x3fbb16c3 */
tf = -1.2148628384e-01, /* 0xbdf8cdcd */
/* tt = -(tail of tf) */
tt = 6.6971006518e-09, /* 0x31e61c52 */
t0 = 4.8383611441e-01, /* 0x3ef7b95e */
t1 = -1.4758771658e-01, /* 0xbe17213c */
t2 = 6.4624942839e-02, /* 0x3d845a15 */
t3 = -3.2788541168e-02, /* 0xbd064d47 */
t4 = 1.7970675603e-02, /* 0x3c93373d */
t5 = -1.0314224288e-02, /* 0xbc28fcfe */
t6 = 6.1005386524e-03, /* 0x3bc7e707 */
t7 = -3.6845202558e-03, /* 0xbb7177fe */
t8 = 2.2596477065e-03, /* 0x3b141699 */
t9 = -1.4034647029e-03, /* 0xbab7f476 */
t10 = 8.8108185446e-04, /* 0x3a66f867 */
t11 = -5.3859531181e-04, /* 0xba0d3085 */
t12 = 3.1563205994e-04, /* 0x39a57b6b */
t13 = -3.1275415677e-04, /* 0xb9a3f927 */
t14 = 3.3552918467e-04, /* 0x39afe9f7 */
u0 = -7.7215664089e-02, /* 0xbd9e233f */
u1 = 6.3282704353e-01, /* 0x3f2200f4 */
u2 = 1.4549225569e+00, /* 0x3fba3ae7 */
u3 = 9.7771751881e-01, /* 0x3f7a4bb2 */
u4 = 2.2896373272e-01, /* 0x3e6a7578 */
u5 = 1.3381091878e-02, /* 0x3c5b3c5e */
v1 = 2.4559779167e+00, /* 0x401d2ebe */
v2 = 2.1284897327e+00, /* 0x4008392d */
v3 = 7.6928514242e-01, /* 0x3f44efdf */
v4 = 1.0422264785e-01, /* 0x3dd572af */
v5 = 3.2170924824e-03, /* 0x3b52d5db */
s0 = -7.7215664089e-02, /* 0xbd9e233f */
s1 = 2.1498242021e-01, /* 0x3e5c245a */
s2 = 3.2577878237e-01, /* 0x3ea6cc7a */
s3 = 1.4635047317e-01, /* 0x3e15dce6 */
s4 = 2.6642270386e-02, /* 0x3cda40e4 */
s5 = 1.8402845599e-03, /* 0x3af135b4 */
s6 = 3.1947532989e-05, /* 0x3805ff67 */
r1 = 1.3920053244e+00, /* 0x3fb22d3b */
r2 = 7.2193557024e-01, /* 0x3f38d0c5 */
r3 = 1.7193385959e-01, /* 0x3e300f6e */
r4 = 1.8645919859e-02, /* 0x3c98bf54 */
r5 = 7.7794247773e-04, /* 0x3a4beed6 */
r6 = 7.3266842264e-06, /* 0x36f5d7bd */
w0 = 4.1893854737e-01, /* 0x3ed67f1d */
w1 = 8.3333335817e-02, /* 0x3daaaaab */
w2 = -2.7777778450e-03, /* 0xbb360b61 */
w3 = 7.9365057172e-04, /* 0x3a500cfd */
w4 = -5.9518753551e-04, /* 0xba1c065c */
w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */
w6 = -1.6309292987e-03; /* 0xbad5c4e8 */
static const float zero= 0.0000000000e+00;
v64sf v64sf_cosf_aux (v64sf x, v64si __mask);
v64sf v64sf_logf_aux (v64sf x, v64si __mask);
v64sf v64sf_sinf_aux (v64sf x, v64si __mask);
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_floorvf) \
&& __has_builtin (__builtin_gcn_fabsvf)
static v64sf
v64sf_sin_pif (v64sf x)
{
// Explicitly create mask for internal function.
v64si __mask = VECTOR_INIT (-1);
FUNCTION_INIT (v64sf);
v64sf y, z;
v64si n, ix;
GET_FLOAT_WORD (ix, x, NO_COND);
ix &= 0x7fffffff;
VECTOR_IF (ix < 0x3e800000, cond)
VECTOR_RETURN (v64sf_sinf_aux (pi * x, __mask), cond);
VECTOR_ENDIF
y = -x; /* x is assume negative */
/*
* argument reduction, make sure inexact flag not raised if input
* is an integer
*/
z = __builtin_gcn_floorvf (y);
VECTOR_IF (z != y, cond)
/* inexact anyway */
VECTOR_COND_MOVE(y, y * 0.5F, cond);
VECTOR_COND_MOVE(y, 2.0F * (y - __builtin_gcn_floorvf (y)), cond); /* y = |x| mod 2.0 */
VECTOR_COND_MOVE(n, __builtin_convertvector(y * 4.0F, v64si), cond);
VECTOR_ELSE (cond)
VECTOR_IF2 (ix >= 0x4b800000, cond2, cond)
VECTOR_COND_MOVE(y, VECTOR_INIT(zero), cond2);
VECTOR_COND_MOVE(n, VECTOR_INIT(0), cond2); /* y must be even */
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE(z, y + two23 /* exact */, cond2 & (ix < 0x4b000000));
GET_FLOAT_WORD (n, z, cond2);
VECTOR_COND_MOVE(n, n & 1, cond2);
VECTOR_COND_MOVE(y, __builtin_convertvector(n, v64sf), cond2);
VECTOR_COND_MOVE(n, n << 2, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (n == 0, cond)
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * y, __mask), cond);
VECTOR_ELSEIF (n == 1 | n == 2, cond)
VECTOR_COND_MOVE(y, v64sf_cosf_aux (pi * (0.5F - y), __mask), cond);
VECTOR_ELSEIF (n == 3 | n == 4, cond)
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * (VECTOR_INIT(one) - y), __mask), cond);
VECTOR_ELSEIF (n == 5 | n == 6, cond)
VECTOR_COND_MOVE(y, -v64sf_cosf_aux (pi * (y - 1.5F), __mask), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE(y, v64sf_sinf_aux (pi * (y - 2.0F), __mask), cond);
VECTOR_ENDIF
VECTOR_RETURN(-y, NO_COND);
FUNCTION_RETURN;
}
DEF_VS_MATH_FUNC (v64sf, lgammaf_r, v64sf x, v64si *signgamp)
{
FUNCTION_INIT (v64sf);
v64sf t,y,z,nadj = VECTOR_INIT(0.0F),p,p1,p2,p3,q,r,w;
v64si i,hx,ix;
GET_FLOAT_WORD(hx,x,NO_COND);
/* purge off +-inf, NaN, +-0, and negative arguments */
*signgamp = VECTOR_INIT(1);
ix = hx&0x7fffffff;
VECTOR_IF(ix>=0x7f800000, cond)
VECTOR_RETURN (x*x, cond);
VECTOR_ENDIF
VECTOR_IF(ix==0, cond)
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (hx<0));
VECTOR_RETURN(one/(x-x), cond);
VECTOR_ENDIF
VECTOR_IF (ix < 0x30800000, cond) /* |x|<2**-30, return -log(|x|) */
VECTOR_IF2(hx<0, cond2, cond)
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond);
VECTOR_RETURN (-v64sf_logf_aux(-x, __mask), cond2);
VECTOR_ELSE2(cond2, cond)
VECTOR_RETURN (-v64sf_logf_aux(x, __mask), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (hx < 0, cond)
VECTOR_IF2(ix>=0x4b000000, cond2, cond) /* |x|>=2**23, must be -integer */
VECTOR_RETURN(one/(x-x), cond2);
VECTOR_ENDIF
VECTOR_COND_MOVE (t, v64sf_sin_pif (x), cond);
VECTOR_IF2(t==zero, cond2, cond)
/* tgamma wants NaN instead of INFINITY */
VECTOR_RETURN(one/(x-x), cond2); /* -integer */
VECTOR_ENDIF
VECTOR_COND_MOVE(nadj, v64sf_logf_aux(VECTOR_INIT(pi)/__builtin_gcn_fabsvf(t*x), __mask), cond);
VECTOR_COND_MOVE(*signgamp, VECTOR_INIT(-1), cond & (t < zero));
VECTOR_COND_MOVE(x, -x, cond);
VECTOR_ENDIF
/* purge off 1 and 2 */
VECTOR_IF(ix==0x3f800000|ix==0x40000000, cond)
VECTOR_COND_MOVE(r, VECTOR_INIT(0.0F), cond);
/* for x < 2.0 */
VECTOR_ELSEIF(ix<0x40000000, cond)
VECTOR_IF2(ix<=0x3f666666, cond2, cond)
/* lgamma(x) = lgamma(x+1)-log(x) */
r = -v64sf_logf_aux(x, __mask);
VECTOR_IF2(ix>=0x3f3b4a20, cond3, cond2)
VECTOR_COND_MOVE(y, one-x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
VECTOR_ELSEIF2(ix>=0x3e6d3308, cond3, cond2)
VECTOR_COND_MOVE(y, x-(tc-one), cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
VECTOR_ELSE2(cond3, cond2)
VECTOR_COND_MOVE(y, x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
VECTOR_ENDIF
VECTOR_ELSE2(cond2, cond)
VECTOR_COND_MOVE(r, VECTOR_INIT(zero), cond2);
VECTOR_IF2(ix>=0x3fdda618, cond3, cond2) /* [1.7316,2] */
VECTOR_COND_MOVE(y, VECTOR_INIT(2.0F)-x, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(0), cond3);
VECTOR_ELSEIF2(ix>=0x3F9da620, cond3, cond2) /* [1.23,1.73] */
VECTOR_COND_MOVE(y, x-tc, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(1), cond3);
VECTOR_ELSE2(cond3, cond2)
VECTOR_COND_MOVE(y, x-one, cond3);
VECTOR_COND_MOVE(i, VECTOR_INIT(2), cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF2(i==0, cond2, cond)
VECTOR_COND_MOVE(z, y*y, cond2);
VECTOR_COND_MOVE(p1, a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))), cond2);
VECTOR_COND_MOVE(p2, z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))), cond2);
VECTOR_COND_MOVE(p, y*p1+p2, cond2);
VECTOR_COND_MOVE(r, r + (p-(float)0.5*y), cond2);
VECTOR_ELSEIF2(i==1, cond2, cond)
VECTOR_COND_MOVE(z, y*y, cond2);
VECTOR_COND_MOVE(w, z*y, cond2);
VECTOR_COND_MOVE(p1, t0+w*(t3+w*(t6+w*(t9 +w*t12))), cond2); /* parallel comp */
VECTOR_COND_MOVE(p2, t1+w*(t4+w*(t7+w*(t10+w*t13))), cond2);
VECTOR_COND_MOVE(p3, t2+w*(t5+w*(t8+w*(t11+w*t14))), cond2);
VECTOR_COND_MOVE(p, z*p1-(tt-w*(p2+y*p3)), cond2);
VECTOR_COND_MOVE(r, r + (tf + p), cond2);
VECTOR_ELSEIF2(i==2, cond2, cond)
VECTOR_COND_MOVE(p1, y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))), cond2);
VECTOR_COND_MOVE(p2, one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))), cond2);
VECTOR_COND_MOVE(r, r + (-(float)0.5*y + p1/p2), cond2);
VECTOR_ENDIF
VECTOR_ELSEIF(ix<0x41000000, cond)
/* x < 8.0 */
VECTOR_COND_MOVE(i, __builtin_convertvector(x, v64si), cond);
VECTOR_COND_MOVE(t, VECTOR_INIT(zero), cond);
VECTOR_COND_MOVE(y, x-__builtin_convertvector(i, v64sf), cond);
VECTOR_COND_MOVE(p, y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))), cond);
VECTOR_COND_MOVE(q, one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))), cond);
VECTOR_COND_MOVE(r, half*y+p/q, cond);
VECTOR_COND_MOVE(z, VECTOR_INIT(one), cond); /* lgamma(1+s) = log(s) + lgamma(s) */
VECTOR_IF2(i==7, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+(float)6.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i==7 | i==6, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+(float)5.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=5, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+(float)4.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=4, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+(float)3.0), cond2);
VECTOR_ENDIF
VECTOR_IF2(i<=7 & i>=3, cond2, cond)
VECTOR_COND_MOVE(z, z * (y+(float)2.0), cond2);
VECTOR_COND_MOVE(r, r + v64sf_logf_aux(z, __mask), cond2);
VECTOR_ENDIF
/* 8.0 <= x < 2**58 */
VECTOR_ELSEIF(ix < 0x5c800000, cond)
VECTOR_COND_MOVE(t, v64sf_logf_aux(x, __mask), cond);
VECTOR_COND_MOVE(z, one/x, cond);
VECTOR_COND_MOVE(y, z*z, cond);
VECTOR_COND_MOVE(w, w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))), cond);
VECTOR_COND_MOVE(r, (x-half)*(t-one)+w, cond);
VECTOR_ELSE(cond)
/* 2**58 <= x <= inf */
VECTOR_COND_MOVE(r, x*(v64sf_logf_aux(x, __mask)-one), cond);
VECTOR_ENDIF
VECTOR_IF(hx<0, cond)
VECTOR_COND_MOVE(r, nadj - r, cond);
VECTOR_ENDIF
VECTOR_RETURN(r, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,111 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/sf_logarithm.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_finitef_aux (v64sf, v64si);
v64si v64sf_isnanf_aux (v64sf, v64si);
static const float a[] = { -0.64124943423745581147e+02,
0.16383943563021534222e+02,
-0.78956112887481257267 };
static const float b[] = { -0.76949932108494879777e+03,
0.31203222091924532844e+03,
-0.35667977739034646171e+02 };
static const float C1 = 0.693145752;
static const float C2 = 1.428606820e-06;
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_frexpvf_mant) \
&& __has_builtin (__builtin_gcn_frexpvf_exp)
DEF_VS_MATH_FUNC (v64sf, logf, v64sf x)
{
FUNCTION_INIT (v64sf);
/* Check for domain/range errors here. */
VECTOR_IF (x == 0.0f, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (-z_infinity_f.f), cond);
VECTOR_ELSEIF (x < 0.0f, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
VECTOR_ELSEIF (~v64sf_finitef_aux (x, __mask), cond)
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_notanum_f.f),
VECTOR_INIT (z_infinity_f.f),
v64sf_isnanf_aux (x, __mask)),
cond);
VECTOR_ENDIF
/* Get the exponent and mantissa where x = f * 2^N. */
v64sf f = __builtin_gcn_frexpvf_mant (x);
v64si N = __builtin_gcn_frexpvf_exp (x);
v64sf z = f - 0.5f;
VECTOR_IF (f > (float) __SQRT_HALF, cond)
VECTOR_COND_MOVE (z, (z - 0.5f) / (f * 0.5f + 0.5f), cond);
VECTOR_ELSE (cond)
VECTOR_COND_MOVE (N, N - 1, cond);
VECTOR_COND_MOVE (z, z / (z * 0.5f + 0.5f), cond);
VECTOR_ENDIF
v64sf w = z * z;
/* Use Newton's method with 4 terms. */
z += z * w * ((a[2] * w + a[1]) * w + a[0]) / (((w + b[2]) * w + b[1]) * w + b[0]);
v64sf Nf = __builtin_convertvector(N, v64sf);
VECTOR_COND_MOVE (z, (Nf * C2 + z) + Nf * C1, N != 0);
VECTOR_RETURN (z, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (logf, sf, sf)
DEF_VS_MATH_FUNC (v64sf, log1pf, v64sf x)
{
/* TODO: Implement algorithm with better precision. */
return v64sf_logf_aux (1 + x, __mask);
}
DEF_VARIANTS (log1pf, sf, sf)
#endif

View File

@ -0,0 +1,50 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/sf_logarithm.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_logf_aux (v64sf, v64si);
static const float C3 = 0.4342944819;
DEF_VS_MATH_FUNC (v64sf, log10f, v64sf x)
{
return v64sf_logf_aux (x, __mask) * C3;
}
DEF_VARIANTS (log10f, sf, sf)

View File

@ -0,0 +1,26 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
#include "amdgcnmach.h"
v64sf v64sf_logf_aux (v64sf, v64si);
static const float C3 = 1.4426950408889634073599246810019;
DEF_VS_MATH_FUNC (v64sf, log2f, v64sf x)
{
return v64sf_logf_aux (x, __mask) * C3;
}
DEF_VARIANTS (log2f, sf, sf)

View File

@ -0,0 +1,69 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_modf.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_numtestf (v64sf);
DEF_VS_MATH_FUNC (v64sf, modff, v64sf x, v64sf *iptr)
{
FUNCTION_INIT (v64sf);
v64sf ret_i;
v64si i0;
GET_FLOAT_WORD (i0, x, NO_COND);
v64si j0 = ((i0 >> 23) & 0xff) - 0x7f; /* exponent of x */
v64sf zero;
SET_FLOAT_WORD (zero, i0 & 0x80000000,NO_COND);
VECTOR_IF (j0 < 23, cond) /* integer part in x*/
VECTOR_IF2 (j0 < 0, cond2, cond) /* |x|<1 */
VECTOR_COND_MOVE (ret_i, zero, cond2);
VECTOR_RETURN (x, cond2);
VECTOR_ELSE2 (cond2, cond)
v64si i = (0x007fffff) >> j0;
VECTOR_IF2 ((i0 & i) == 0, cond3, cond2) /* x is integral */
VECTOR_COND_MOVE (ret_i, x, cond3);
VECTOR_RETURN (zero, cond3);
VECTOR_ELSE2 (cond3, cond2)
SET_FLOAT_WORD (ret_i, i0 & ~i, cond3);
VECTOR_RETURN (x - ret_i, cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_ELSE (cond) /* no fraction part */
VECTOR_COND_MOVE (ret_i, x, cond);
VECTOR_IF2 (v64sf_numtestf (x) == NAN, cond2, cond)
VECTOR_COND_MOVE (ret_i, x + x, cond2);
VECTOR_RETURN (ret_i, cond2); /* x is NaN, return NaN */
VECTOR_ENDIF
VECTOR_RETURN (zero, cond); /* return +- 0 */
VECTOR_ENDIF
*iptr = ret_i;
FUNCTION_RETURN;
}

View File

@ -0,0 +1,59 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_numtest.c in Newlib. */
#include "amdgcnmach.h"
v64si
v64sf_numtestf (v64sf x)
{
// Explicitly create mask for internal function.
v64si __mask = VECTOR_INIT (-1);
FUNCTION_INIT (v64si);
v64si wx;
GET_FLOAT_WORD (wx, x, NO_COND);
v64si exp = (wx & 0x7f800000) >> 23;
/* Check for a zero input. */
VECTOR_RETURN (VECTOR_INIT (0), x == 0.0);
/* Check for not a number or infinity. */
VECTOR_IF (exp == 0xff, cond)
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (NAN), VECTOR_INIT (INF),
wx & 0x7fffff),
cond);
/* Otherwise it's a finite value. */
VECTOR_ELSE (cond)
VECTOR_RETURN (VECTOR_INIT (NUM), cond);
VECTOR_ENDIF
FUNCTION_RETURN;
}

View File

@ -0,0 +1,306 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/math/ef_pow.c in Newlib. */
#include "amdgcnmach.h"
static const float
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */
dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */
zero = 0.0,
one = 1.0,
two = 2.0,
two24 = 16777216.0, /* 0x4b800000 */
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
L1 = 6.0000002384e-01, /* 0x3f19999a */
L2 = 4.2857143283e-01, /* 0x3edb6db7 */
L3 = 3.3333334327e-01, /* 0x3eaaaaab */
L4 = 2.7272811532e-01, /* 0x3e8ba305 */
L5 = 2.3066075146e-01, /* 0x3e6c3255 */
L6 = 2.0697501302e-01, /* 0x3e53f142 */
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
P2 = -2.7777778450e-03, /* 0xbb360b61 */
P3 = 6.6137559770e-05, /* 0x388ab355 */
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
P5 = 4.1381369442e-08, /* 0x3331bb4c */
lg2 = 6.9314718246e-01, /* 0x3f317218 */
lg2_h = 6.93145752e-01, /* 0x3f317200 */
lg2_l = 1.42860654e-06, /* 0x35bfbe8c */
ovt = 4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
cp = 9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
cp_h = 9.6179199219e-01, /* 0x3f763800 =head of cp */
cp_l = 4.7017383622e-06, /* 0x369dc3a0 =tail of cp_h */
ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
ivln2_h = 1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
ivln2_l = 7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/
v64sf v64sf_sqrtf_aux (v64sf, v64si);
v64sf v64sf_scalbnf_aux (v64sf, v64si, v64si);
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, powf, v64sf x, v64sf y)
{
FUNCTION_INIT (v64sf);
v64si hx, hy;
GET_FLOAT_WORD (hx,x, NO_COND);
GET_FLOAT_WORD (hy,y, NO_COND);
v64si ix = hx&0x7fffffff;
v64si iy = hy&0x7fffffff;
/* y==zero: x**0 = 1 */
VECTOR_IF (FLT_UWORD_IS_ZERO(iy), cond)
VECTOR_RETURN (x + y, cond & v64sf_issignalingf_inline(x));
VECTOR_RETURN (VECTOR_INIT (1.0f), cond);
VECTOR_ENDIF
/* x|y==NaN return NaN unless x==1 then return 1 */
VECTOR_IF (FLT_UWORD_IS_NAN(ix) | FLT_UWORD_IS_NAN(iy), cond)
VECTOR_IF2 (hx==0x3f800000 & ~v64sf_issignalingf_inline(y), cond2, cond)
VECTOR_RETURN (VECTOR_INIT (1.0f), cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (x + y, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
/* determine if y is an odd int when x < 0
* yisint = 0 ... y is not an integer
* yisint = 1 ... y is an odd int
* yisint = 2 ... y is an even int
*/
v64si yisint = VECTOR_INIT (0);
VECTOR_IF (hx < 0, cond)
VECTOR_IF2 (iy >= 0x4b800000, cond2, cond)
VECTOR_COND_MOVE (yisint, VECTOR_INIT (2), cond2); /* even integer y */
VECTOR_ELSEIF2 (iy >= 0x3f800000, cond2, cond)
v64si k = (iy>>23)-0x7f; /* exponent */
v64si j = iy>>(23-k);
VECTOR_COND_MOVE (yisint, 2-(j&1), cond2 & (j<<(23-k))==iy);
VECTOR_ENDIF
VECTOR_ENDIF
/* special value of y */
VECTOR_IF (FLT_UWORD_IS_INFINITE(iy), cond) /* y is +-inf */
VECTOR_IF2 (ix==0x3f800000, cond2, cond)
VECTOR_RETURN (VECTOR_INIT (1.0f), cond2); /* +-1**+-inf = 1 */
VECTOR_ELSEIF2 (ix > 0x3f800000, cond2, cond) /* (|x|>1)**+-inf = inf,0 */
VECTOR_RETURN (y, cond2 & (hy >= 0));
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
VECTOR_ELSE2 (cond2, cond) /* (|x|<1)**-,+inf = inf,0 */
VECTOR_RETURN (-y, cond2 & (hy<0));
VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (iy==0x3f800000, cond) /* y is +-1 */
VECTOR_RETURN (VECTOR_INIT (1.0f) / x, cond & (hy<0));
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_RETURN (x*x, hy==0x40000000); /* y is 2 */
/* y is 0.5 */
/* x >= +0 */
VECTOR_RETURN (v64sf_sqrtf_aux (x, __mask), (hy==0x3f000000) & (hx >= 0));
v64sf ax = __builtin_gcn_fabsvf(x);
/* special value of x */
VECTOR_IF (FLT_UWORD_IS_INFINITE(ix)|FLT_UWORD_IS_ZERO(ix)|ix==0x3f800000, cond)
v64sf z = ax; /*x is +-0,+-inf,+-1*/
VECTOR_COND_MOVE (z, VECTOR_INIT (1.0f) / z, cond & (hy < 0)); /* z = (1/|x|) */
VECTOR_IF2 (hx<0, cond2, cond)
VECTOR_IF2 (((ix-0x3f800000)|yisint)==0, cond3, cond2)
/* (-1)**non-int is NaN */
VECTOR_COND_MOVE (z, (z-z)/(z-z), cond3);
VECTOR_ELSEIF2 (yisint==1, cond3, cond2)
/* (x<0)**odd = -(|x|**odd) */
VECTOR_COND_MOVE (z, -z, cond3);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_RETURN (z, cond);
VECTOR_ENDIF
/* (x<0)**(non-int) is NaN */
VECTOR_RETURN ((x-x)/(x-x), ((((hx >> 31) & 1) - 1)|yisint)==0);
v64sf t1, t2;
/* |y| is huge */
VECTOR_IF (iy>0x4d000000, cond) /* if |y| > 2**27 */
/* over/underflow if x is not close to one */
VECTOR_IF2(ix<0x3f7ffff4, cond2, cond)
VECTOR_RETURN (v64sf_math_oflowf(VECTOR_INIT (0)), cond2 & (hy < 0));
VECTOR_RETURN (v64sf_math_uflowf(VECTOR_INIT (0)), cond2);
VECTOR_ENDIF
VECTOR_IF2(ix>0x3f800007, cond2, cond)
VECTOR_RETURN (v64sf_math_oflowf(VECTOR_INIT (0)), cond2 & (hy > 0));
VECTOR_RETURN (v64sf_math_uflowf(VECTOR_INIT (0)), cond2);
VECTOR_ENDIF
/* now |1-x| is tiny <= 2**-20, suffice to compute
log(x) by x-x^2/2+x^3/3-x^4/4 */
v64sf t = ax-1; /* t has 20 trailing zeros */
v64sf w = (t*t)*(0.5f-t*(0.333333333333f-t*0.25f));
v64sf u = ivln2_h*t; /* ivln2_h has 16 sig. bits */
v64sf v = t*ivln2_l-w*ivln2;
VECTOR_COND_MOVE (t1, u+v, cond);
v64si is;
GET_FLOAT_WORD(is,t1, cond);
SET_FLOAT_WORD(t1,is&0xfffff000, cond);
VECTOR_COND_MOVE (t2, v-(t1-u), cond);
VECTOR_RETURN (VECTOR_INIT (0.123456f), cond);
VECTOR_ELSE (cond)
v64si n = VECTOR_INIT (0);
/* take care subnormal number */
VECTOR_IF2 (FLT_UWORD_IS_SUBNORMAL(ix), cond2, cond)
VECTOR_COND_MOVE (ax, ax * two24, cond);
VECTOR_COND_MOVE (n, n - 24, cond2);
GET_FLOAT_WORD(ix,ax, cond2);
VECTOR_ENDIF
n += (ix>>23)-0x7f;
v64si j = ix&0x007fffff;
/* determine interval */
v64si ix = j|0x3f800000; /* normalize ix */
v64si k;
VECTOR_IF2 (j<=0x1cc471, cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2); /* |x|<sqrt(3/2) */
VECTOR_ELSEIF2 (j<0x5db3d7, cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (1), cond2); /* |x|<sqrt(3) */
VECTOR_ELSE2 (cond2, cond)
VECTOR_COND_MOVE (k, VECTOR_INIT (0), cond2);
VECTOR_COND_MOVE (n, n + 1, cond2);
VECTOR_COND_MOVE (ix, ix - 0x00800000, cond2);
VECTOR_ENDIF
SET_FLOAT_WORD(ax,ix, cond);
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
/* bp[0]=1.0, bp[1]=1.5 */
v64sf bp_k = VECTOR_MERGE (VECTOR_INIT (bp[1]), VECTOR_INIT (bp[0]), k == 1);
v64sf u = ax-bp_k;
v64sf v = 1.0f/(ax+bp_k);
v64sf s = u*v;
v64sf s_h = s;
v64si is;
GET_FLOAT_WORD(is,s_h, cond);
SET_FLOAT_WORD(s_h,is&0xfffff000, cond);
/* t_h=ax+bp[k] High */
v64sf t_h;
SET_FLOAT_WORD(t_h,((ix>>1)|0x20000000)+0x0040000+(k<<21), cond);
v64sf t_l = ax - (t_h-bp_k);
v64sf s_l = v*((u-s_h*t_h)-s_h*t_l);
/* compute log(ax) */
v64sf s2 = s*s;
v64sf r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
r += s_l*(s_h+s);
s2 = s_h*s_h;
t_h = __builtin_convertvector(3.0f+s2+r, v64sf);
GET_FLOAT_WORD(is,t_h, cond);
SET_FLOAT_WORD(t_h,is&0xfffff000, cond);
t_l = r-((t_h-3.0f)-s2);
/* u+v = s*(1+...) */
u = s_h*t_h;
v = s_l*t_h+t_l*s;
/* 2/(3log2)*(s+...) */
v64sf p_h = u+v;
GET_FLOAT_WORD(is,p_h, cond);
SET_FLOAT_WORD(p_h,is&0xfffff000, cond);
v64sf p_l = v-(p_h-u);
v64sf z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
v64sf dp_l_k = VECTOR_MERGE (VECTOR_INIT (dp_l[1]), VECTOR_INIT (dp_l[0]), k == 1);
v64sf z_l = cp_l*p_h+p_l*cp+dp_l_k;
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
v64sf t = __builtin_convertvector (n, v64sf);
v64sf dp_h_k = VECTOR_MERGE (VECTOR_INIT (dp_h[1]), VECTOR_INIT (dp_h[0]), k == 1);
VECTOR_COND_MOVE (t1, (((z_h+z_l)+dp_h_k)+t), cond);
GET_FLOAT_WORD(is,t1, cond);
SET_FLOAT_WORD(t1,is&0xfffff000, cond);
VECTOR_COND_MOVE (t2, z_l-(((t1-t)-dp_h_k)-z_h), cond);
VECTOR_ENDIF
v64sf s = VECTOR_INIT (1.0f); /* s (sign of result -ve**odd) = -1 else = 1 */
VECTOR_COND_MOVE (s, VECTOR_INIT (-1.0f), /* (-ve)**(odd int) */
((hx>>31) != 0)&(yisint == 1));
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
v64si is;
GET_FLOAT_WORD(is,y, NO_COND);
v64sf y1;
SET_FLOAT_WORD(y1,is&0xfffff000, NO_COND);
v64sf p_l = (y-y1)*t1+y*t2;
v64sf p_h = y1*t1;
v64sf z = p_l+p_h;
v64si j;
GET_FLOAT_WORD(j,z, NO_COND);
v64si i = j&0x7fffffff;
VECTOR_IF (j>0, cond)
VECTOR_RETURN (v64sf_math_oflowf(s<0), cond & i>FLT_UWORD_EXP_MAX); /* overflow */
VECTOR_RETURN (v64sf_math_oflowf(s<0), cond & (i==FLT_UWORD_EXP_MAX)
& (p_l+ovt>z-p_h)); /* overflow */
VECTOR_ELSE (cond)
VECTOR_RETURN (v64sf_math_uflowf(s<0), cond & (i>FLT_UWORD_EXP_MIN)); /* underflow */
VECTOR_RETURN (v64sf_math_uflowf(s<0), cond & (i==FLT_UWORD_EXP_MIN)
& (p_l<=z-p_h)); /* underflow */
VECTOR_ENDIF
/*
* compute 2**(p_h+p_l)
*/
v64si k = (i>>23)-0x7f;
v64si n = VECTOR_INIT (0);
VECTOR_IF (i>0x3f000000, cond) /* if |z| > 0.5, set n = [z+0.5] */
VECTOR_COND_MOVE (n, j+(0x00800000>>(k+1)), cond);
k = ((n&0x7fffffff)>>23)-0x7f; /* new k for n */
v64sf t;
SET_FLOAT_WORD(t,n&~(0x007fffff>>k), cond);
VECTOR_COND_MOVE (n, ((n&0x007fffff)|0x00800000)>>(23-k), cond);
VECTOR_COND_MOVE (n, -n, cond & (j<0));
VECTOR_COND_MOVE (p_h, p_h - t, cond);
VECTOR_ENDIF
v64sf t = p_l+p_h;
GET_FLOAT_WORD(is,t, NO_COND);
SET_FLOAT_WORD(t,is&0xfffff000, NO_COND);
v64sf u = t*lg2_h;
v64sf v = (p_l-(t-p_h))*lg2+t*lg2_l;
z = u+v;
v64sf w = v-(z-u);
t = z*z;
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
v64sf r = (z*t1)/(t1-2.0f)-(w+z*w);
z = VECTOR_INIT (1.0f)-(r-z);
GET_FLOAT_WORD(j,z, NO_COND);
j += (n<<23);
VECTOR_IF ((j>>23)<=0, cond)
VECTOR_COND_MOVE (z, v64sf_scalbnf_aux(z, n, __mask), cond); /* subnormal output */
VECTOR_ELSE (cond)
SET_FLOAT_WORD(z, j, cond);
VECTOR_ENDIF
VECTOR_RETURN (s*z, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (powf, sf, sf)
#endif

View File

@ -0,0 +1,85 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/ef_remainder.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_fmodf_aux (v64sf, v64sf, v64si);
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, remainderf, v64sf x, v64sf p)
{
FUNCTION_INIT (v64sf);
v64si hx;
GET_FLOAT_WORD (hx, x, NO_COND);
v64si hp;
GET_FLOAT_WORD (hp, p, NO_COND);
v64si sx = hx & 0x80000000;
hp &= 0x7fffffff;
hx &= 0x7fffffff;
/* purge off exception values */
/*if(hp==0) // p = 0 */
/*if((hx>=0x7f800000)|| // x not finite
((hp>0x7f800000))) // p is NaN */
VECTOR_RETURN ((x*p) / (x*p),
(hp == 0) | (hx >= 0x7f800000) | (hp > 0x7f800000));
/* if (hp<=0x7effffff) // now x < 2p */
VECTOR_COND_MOVE (x, v64sf_fmodf_aux (x, p+p, __mask), hp <= 0x7effffff);
/*if ((hx-hp)==0) */
VECTOR_RETURN (0.0f * x, (hx-hp) == 0);
x = __builtin_gcn_fabsvf (x);
p = __builtin_gcn_fabsvf (p);
VECTOR_IF (hp < 0x01000000, cond)
VECTOR_IF2 (x + x > p, cond2, cond)
VECTOR_COND_MOVE (x, x - p, cond2);
VECTOR_COND_MOVE (x, x - p, cond2 & (x + x >= p));
VECTOR_ENDIF
VECTOR_ELSE (cond)
v64sf p_half = 0.5f * p;
VECTOR_IF2 (x > p_half, cond2, cond)
VECTOR_COND_MOVE (x, x - p, cond2);
VECTOR_COND_MOVE (x, x - p, cond2 & (x >= p_half));
VECTOR_ENDIF
VECTOR_ENDIF
GET_FLOAT_WORD (hx, x, NO_COND);
SET_FLOAT_WORD (x, hx ^ sx, NO_COND);
VECTOR_RETURN (x, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (remainderf, sf, sf)
#endif

View File

@ -0,0 +1,76 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_rint.c in Newlib. */
#include "amdgcnmach.h"
static const float TWO23[2] = {
8.3886080000e+06, /* 0x4b000000 */
-8.3886080000e+06, /* 0xcb000000 */
};
DEF_VS_MATH_FUNC (v64sf, rintf, v64sf x)
{
FUNCTION_INIT (v64sf);
v64si i0;
GET_FLOAT_WORD (i0, x, NO_COND);
v64si sx = (i0 >> 31) & 1;
v64sf two23 = VECTOR_MERGE (TWO23[1] + x, TWO23[0] + x, sx != 0);
v64si ix = (i0 & 0x7fffffff);
v64si j0 = (ix >> 23) - 0x7f;
VECTOR_IF (j0 < 23, cond)
VECTOR_RETURN (x, cond & FLT_UWORD_IS_ZERO (ix));
VECTOR_IF2 (j0 < 0, cond2, cond)
v64si i1 = (i0 & 0x07fffff);
VECTOR_COND_MOVE (i0, i0 & 0xfff00000, cond2);
VECTOR_COND_MOVE (i0, i0 | (((i1 | -i1) >> 9) & 0x400000), cond2);
SET_FLOAT_WORD (x, i0, cond2);
v64sf w = two23 + x;
v64sf t = w - two23;
GET_FLOAT_WORD (i0, t, cond2);
SET_FLOAT_WORD (t, (i0&0x7fffffff)|(sx<<31), cond2);
VECTOR_RETURN (t, cond2);
VECTOR_ELSE2 (cond2, cond)
v64si i = (0x007fffff) >> j0;
VECTOR_RETURN (x, cond2 & ((i0 & i) == 0)); /* x is integral */
i >>= 1;
VECTOR_COND_MOVE (i0, (i0 & (~i)) | (0x200000 >> j0),
cond2 & ((i0 & i) != 0));
VECTOR_ENDIF
VECTOR_ELSE (cond)
VECTOR_RETURN (x + x, cond & ~FLT_UWORD_IS_FINITE (ix)); /* inf or NaN */
VECTOR_RETURN (x, cond); /* x is integral */
VECTOR_ENDIF
SET_FLOAT_WORD (x, i0, NO_COND);
v64sf w = two23 + x;
VECTOR_RETURN (w - two23, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (rintf, sf, sf)

View File

@ -0,0 +1,73 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/ef_scalb.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_isnanf (v64sf);
v64si v64sf_finitef (v64sf);
v64sf v64sf_rintf_aux (v64sf, v64si);
v64sf v64sf_scalbnf_aux (v64sf, v64si, v64si);
DEF_VS_MATH_FUNC (v64sf, scalbf, v64sf x, v64sf fn)
{
FUNCTION_INIT (v64sf);
VECTOR_IF (v64sf_isnanf(x) | v64sf_isnanf(fn), cond)
VECTOR_RETURN (x * fn, cond);
VECTOR_ENDIF
VECTOR_IF (~v64sf_finitef (fn), cond)
VECTOR_IF2 (fn > 0.0f, cond2, cond)
VECTOR_RETURN (x * fn, cond2);
VECTOR_ELSE2 (cond2, cond)
VECTOR_RETURN (x / (-fn), cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (v64sf_rintf_aux (fn, __mask) != fn, cond)
VECTOR_RETURN ((fn-fn)/(fn-fn), cond);
VECTOR_ENDIF
#if INT_MAX > 65000
VECTOR_IF (fn > 65000.0f, cond)
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (65000), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (-fn > 65000.0f, cond)
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (-65000), __mask), cond);
VECTOR_ENDIF
#else
VECTOR_IF (fn > 32000.0f, cond)
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (32000), __mask), cond);
VECTOR_ENDIF
VECTOR_IF (-fn > 32000.0f, cond)
VECTOR_RETURN (v64sf_scalbnf_aux (x, VECTOR_INIT (-32000), __mask), cond);
VECTOR_ENDIF
#endif
VECTOR_RETURN (v64sf_scalbnf_aux (x, __builtin_convertvector (fn, v64si), __mask),
NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS2 (scalbf, sf, sf)

View File

@ -0,0 +1,92 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/common/sf_scalbn.c in Newlib. */
#include "amdgcnmach.h"
#include <limits.h>
#include <float.h>
#if INT_MAX > 50000
#define OVERFLOW_INT 50000
#else
#define OVERFLOW_INT 30000
#endif
static const float
two25 = 3.355443200e+07, /* 0x4c000000 */
twom25 = 2.9802322388e-08, /* 0x33000000 */
huge = 1.0e+30,
tiny = 1.0e-30;
v64sf v64sf_copysignf_aux (v64sf, v64sf, v64si);
DEF_VS_MATH_FUNC (v64sf, scalbnf, v64sf x, v64si n)
{
FUNCTION_INIT (v64sf);
const v64sf huge_v = VECTOR_INIT ((float) huge);
const v64sf tiny_v = VECTOR_INIT ((float) tiny);
v64si ix;
GET_FLOAT_WORD (ix, x, NO_COND);
v64si hx = ix & 0x7fffffff;
v64si k = hx >> 23; /* extract exponent */
VECTOR_IF (FLT_UWORD_IS_ZERO(hx), cond)
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_IF (~FLT_UWORD_IS_FINITE(hx), cond) /* NaN or Inf */
VECTOR_RETURN (x + x, cond);
VECTOR_ENDIF
VECTOR_IF (FLT_UWORD_IS_SUBNORMAL (hx), cond);
VECTOR_COND_MOVE (x, x * two25, cond);
GET_FLOAT_WORD (ix, x, cond);
VECTOR_COND_MOVE (k, ((ix & 0x7f800000) >> 23) - 25, cond);
VECTOR_IF2 (n < -50000, cond2, cond) /*underflow*/
VECTOR_RETURN (tiny * x, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF (n > OVERFLOW_INT, cond) // in case integer overflow in n+k
VECTOR_RETURN (huge_v * v64sf_copysignf_aux (huge_v, x, __mask), cond);
VECTOR_ENDIF
k = k + n;
VECTOR_IF (k > FLT_LARGEST_EXP, cond) /* overflow */
VECTOR_RETURN (huge_v * v64sf_copysignf_aux (huge_v, x, __mask), cond);
VECTOR_ENDIF
VECTOR_IF (k > 0, cond) /* normal result */
SET_FLOAT_WORD (x, (ix & 0x807fffff) | (k << 23), cond);
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
VECTOR_IF (k < FLT_SMALLEST_EXP, cond) /*underflow*/
VECTOR_RETURN (tiny_v * v64sf_copysignf_aux (tiny_v, x, __mask),
k < FLT_SMALLEST_EXP);
VECTOR_ENDIF
k += 25; /* subnormal result */
SET_FLOAT_WORD (x, (ix & 0x807fffff) | (k << 23), NO_COND);
VECTOR_RETURN (x * twom25, NO_COND);
FUNCTION_RETURN;
}

View File

@ -0,0 +1,38 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/mathfp/sf_signif.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_scalbf_aux (v64sf x, v64sf fn, v64si);
v64si v64sf_ilogbf_aux (v64sf x, v64si);
DEF_VS_MATH_FUNC (v64sf, significandf, v64sf x)
{
return v64sf_scalbf_aux (x, -__builtin_convertvector (v64sf_ilogbf_aux (x, __mask), v64sf), __mask);
}
DEF_VARIANTS (significandf, sf, sf)

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_sin.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_sinef_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, sinf, v64sf x)
{
return v64sf_sinef_aux (x, 0, __mask);
}
DEF_VARIANTS (sinf, sf, sf)

View File

@ -0,0 +1,122 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based on newlib/libm/mathfp/sf_sine.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_numtestf (v64sf);
static const float HALF_PI = 1.570796326;
static const float ONE_OVER_PI = 0.318309886;
static const float r[] = { -0.1666665668,
0.8333025139e-02,
-0.1980741872e-03,
0.2601903036e-5 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, sinef, v64sf x, int cosine)
{
const float YMAX = 210828714.0;
FUNCTION_INIT (v64sf);
v64si num_type = v64sf_numtestf (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
VECTOR_ENDIF
/* Use sin and cos properties to ease computations. */
v64si sgn;
v64sf y;
if (cosine)
{
sgn = VECTOR_INIT (0);
y = __builtin_gcn_fabsvf (x) + HALF_PI;
}
else
{
sgn = x < 0.0f;
y = VECTOR_MERGE (-x, x, x < 0.0f);
}
/* Check for values of y that will overflow here. */
VECTOR_IF (y > YMAX, cond)
errno = ERANGE;
VECTOR_RETURN (x, cond);
VECTOR_ENDIF
/* Calculate the exponent. */
v64si Nneg = __builtin_convertvector (y * ONE_OVER_PI - 0.5f, v64si);
v64si Npos = __builtin_convertvector (y * ONE_OVER_PI + 0.5f, v64si);
v64si N = VECTOR_MERGE (Nneg, Npos, y < 0.0f);
v64sf XN = __builtin_convertvector (N, v64sf);
VECTOR_COND_MOVE (sgn, ~sgn, (N & 1) != 0);
if (cosine)
XN -= 0.5;
y = __builtin_gcn_fabsvf (x) - XN * (float) __PI;
v64sf res;
VECTOR_IF ((-z_rooteps_f < y) & (y < z_rooteps_f), cond)
VECTOR_COND_MOVE (res, y, cond);
VECTOR_ELSE (cond)
v64sf g = y * y;
/* Calculate the Taylor series. */
v64sf R = (((r[3] * g + r[2]) * g + r[1]) * g + r[0]) * g;
/* Finally, compute the result. */
VECTOR_COND_MOVE (res, y + y * R, cond);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, sgn);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,127 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/******************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
******************************************************************/
/* Based in newlib/libm/mathfp/sf_sineh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_expf_aux (v64sf, v64si);
v64si v64sf_numtestf (v64sf);
v64si v64sf_isposf (v64sf);
static const float q[] = { -0.428277109e+2 };
static const float p[] = { -0.713793159e+1,
-0.190333399 };
static const float LNV = 0.6931610107;
static const float INV_V2 = 0.2499930850;
static const float V_OVER2_MINUS1 = 0.1383027787e-4;
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, sinehf, v64sf x, int cosineh)
{
const float WBAR = 18.55;
FUNCTION_INIT (v64sf);
v64si sgn = VECTOR_INIT (0);
v64si v_cosineh = VECTOR_INIT (cosineh ? -1 : 0);
/* Check for special values. */
v64si num_type = v64sf_numtestf (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_MERGE (VECTOR_INIT (z_infinity_f.f),
VECTOR_INIT (-z_infinity_f.f),
v64sf_isposf (x)),
cond);
VECTOR_ENDIF
v64sf y = __builtin_gcn_fabsvf (x);
if (!cosineh)
VECTOR_COND_MOVE (sgn, VECTOR_INIT (-1), x < 0.0f);
v64sf res;
VECTOR_IF (((y > 1.0f) & ~v_cosineh) | v_cosineh, cond)
VECTOR_IF2 (y > (float) BIGX, cond2, cond)
v64sf w = y - LNV;
/* Check for w > maximum here. */
VECTOR_IF2 (w > (float) BIGX, cond3, cond2)
errno = ERANGE;
VECTOR_RETURN (x, cond3);
VECTOR_ENDIF
v64sf z = v64sf_expf_aux (w, __mask);
VECTOR_COND_MOVE (res, z * (V_OVER2_MINUS1 + 1.0f),
cond2 & (w > WBAR));
VECTOR_ELSE2 (cond2, cond)
v64sf z = v64sf_expf_aux (y, __mask);
if (cosineh) {
VECTOR_COND_MOVE (res, (z + 1 / z) * 0.5f, cond2);
} else {
VECTOR_COND_MOVE (res, (z - 1 / z) * 0.5f, cond2);
}
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, sgn);
VECTOR_ELSE (cond)
/* Check for y being too small. */
VECTOR_IF2 (y < z_rooteps_f, cond2, cond);
VECTOR_COND_MOVE (res, x, cond2);
VECTOR_ELSE2 (cond2, cond)
/* Calculate the Taylor series. */
v64sf f = x * x;
v64sf Q = f + q[0];
v64sf P = p[1] * f + p[0];
v64sf R = f * (P / Q);
VECTOR_COND_MOVE (res, x + x * R, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
#endif

View File

@ -0,0 +1,41 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/* Based on newlib/libm/mathfp/sf_sinh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_sinehf_aux (v64sf, int, v64si);
DEF_VS_MATH_FUNC (v64sf, sinhf, v64sf x)
{
return v64sf_sinehf_aux (x, 0, __mask);
}
DEF_VARIANTS (sinhf, sf, sf)

View File

@ -0,0 +1,103 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/*****************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
*****************************************************************/
/* Based on newlib/libm/mathfp/sf_sqrt.c in Newlib. */
#include "amdgcnmach.h"
v64si v64sf_numtestf (v64sf);
v64si v64sf_isposf (v64sf);
#if defined (__has_builtin) \
&& __has_builtin (__builtin_gcn_frexpvf_mant) \
&& __has_builtin (__builtin_gcn_frexpvf_exp) \
&& __has_builtin (__builtin_gcn_ldexpvf)
DEF_VS_MATH_FUNC (v64sf, sqrtf, v64sf x)
{
FUNCTION_INIT (v64sf);
/* Check for special values. */
v64si num_type = v64sf_numtestf (x);
VECTOR_IF (num_type == NAN, cond)
errno = EDOM;
VECTOR_RETURN (x, cond);
VECTOR_ELSEIF (num_type == INF, cond)
VECTOR_IF2 (v64sf_isposf (x), cond2, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond2);
VECTOR_ELSE2 (cond2,cond)
errno = ERANGE;
VECTOR_RETURN (VECTOR_INIT (z_infinity_f.f), cond);
VECTOR_ENDIF
VECTOR_ENDIF
/* Initial checks are performed here. */
VECTOR_IF (x == 0.0f, cond)
VECTOR_RETURN (VECTOR_INIT (0.0f), cond);
VECTOR_ENDIF
VECTOR_IF (x < 0.0f, cond)
errno = EDOM;
VECTOR_RETURN (VECTOR_INIT (z_notanum_f.f), cond);
VECTOR_ENDIF
/* Find the exponent and mantissa for the form x = f * 2^exp. */
v64sf f = __builtin_gcn_frexpvf_mant (x);
v64si exp = __builtin_gcn_frexpvf_exp (x);
v64si odd = (exp & 1) != 0;
/* Get the initial approximation. */
v64sf y = 0.41731f + 0.59016f * f;
f *= 0.5f;
/* Calculate the remaining iterations. */
y = y * 0.5f + f / y;
y = y * 0.5f + f / y;
/* Calculate the final value. */
VECTOR_COND_MOVE (y, y * (float) __SQRT_HALF, odd);
VECTOR_COND_MOVE (exp, exp + 1, odd);
exp >>= 1;
y = __builtin_gcn_ldexpvf (y, exp);
VECTOR_RETURN (y, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (sqrtf, sf, sf)
#endif

View File

@ -0,0 +1,152 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Based on newlib/libm/math/kf_tan.c in Newlib. */
#include "amdgcnmach.h"
static const float
one = 1.0000000000e+00, /* 0x3f800000 */
pio4 = 7.8539812565e-01, /* 0x3f490fda */
pio4lo= 3.7748947079e-08, /* 0x33222168 */
T[] = {
3.3333334327e-01, /* 0x3eaaaaab */
1.3333334029e-01, /* 0x3e088889 */
5.3968254477e-02, /* 0x3d5d0dd1 */
2.1869488060e-02, /* 0x3cb327a4 */
8.8632395491e-03, /* 0x3c11371f */
3.5920790397e-03, /* 0x3b6b6916 */
1.4562094584e-03, /* 0x3abede48 */
5.8804126456e-04, /* 0x3a1a26c8 */
2.4646313977e-04, /* 0x398137b9 */
7.8179444245e-05, /* 0x38a3f445 */
7.1407252108e-05, /* 0x3895c07a */
-1.8558637748e-05, /* 0xb79bae5f */
2.5907305826e-05, /* 0x37d95384 */
};
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
static v64sf
v64sf_kernel_tanf (v64sf x, v64sf y, v64si iy, v64si __mask)
{
FUNCTION_INIT (v64sf);
v64si hx;
GET_FLOAT_WORD(hx, x, NO_COND);
v64si ix = hx & 0x7fffffff; /* high word of |x| */
VECTOR_IF(ix<0x31800000, cond) /* x < 2**-28 */
VECTOR_IF2(__builtin_convertvector (x, v64si)==0, cond2, cond) /* generate inexact */
VECTOR_RETURN (1.0f / __builtin_gcn_fabsvf (x), (ix|(iy+1))==0);
VECTOR_RETURN (x, cond2 & (iy == 1));
VECTOR_RETURN (-1.0f / x, cond2);
VECTOR_ENDIF
VECTOR_ENDIF
VECTOR_IF(ix>=0x3f2ca140, cond) /* |x|>=0.6744 */
VECTOR_COND_MOVE (x, -x, cond & (hx < 0));
VECTOR_COND_MOVE (y, -y, cond & (hx < 0));
v64sf z = pio4-x;
v64sf w = pio4lo-y;
VECTOR_COND_MOVE (x, z+w, cond);
VECTOR_COND_MOVE (y, VECTOR_INIT (0.0f), cond);
VECTOR_ENDIF
v64sf z = x*x;
v64sf w = z*z;
/* Break x^5*(T[1]+x^2*T[2]+...) into
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/
v64sf r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
v64sf v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
v64sf s = z*x;
r = y + z*(s*(r+v)+y);
r += T[0]*s;
w = x+r;
VECTOR_IF(ix>=0x3f2ca140, cond)
v = __builtin_convertvector (iy, v64sf);
VECTOR_RETURN (__builtin_convertvector (1-((hx>>30)&2), v64sf)
* (v-2.0f*(x-(w*w/(w+v)-r))), cond);
VECTOR_ENDIF
VECTOR_RETURN (w, iy == 1);
/* if allow error up to 2 ulp,
simply return -1.0/(x+r) here */
/* compute -1.0/(x+r) accurately */
z = w;
v64si i;
GET_FLOAT_WORD(i,z, NO_COND);
SET_FLOAT_WORD(z,i&0xfffff000, NO_COND);
v = r - (z - x); /* z+v = r+x */
v64sf a, t;
t = a = -1.0f/w; /* a = -1.0/w */
GET_FLOAT_WORD(i,t, NO_COND);
SET_FLOAT_WORD(t,i&0xfffff000, NO_COND);
s = 1.0f+t*z;
VECTOR_RETURN (t+a*(s+t*v), NO_COND);
FUNCTION_RETURN;
}
static v64si
v64sf_rem_pio2f (v64sf x, v64sf *y)
{
/* Work in double-precision for better accuracy. */
v64df dx = __builtin_convertvector (x, v64df);
v64df r = dx * __INV_PI_OVER_TWO_2_24;
v64si n = (__builtin_convertvector (r, v64si) + 0x800000) >> 24;
dx = dx - __builtin_convertvector (n, v64df) * __PI_OVER_TWO;
y[0] = __builtin_convertvector (dx, v64sf);
y[1] = __builtin_convertvector (dx, v64sf) - y[0];
return n;
}
DEF_VS_MATH_FUNC (v64sf, tanf, v64sf x)
{
FUNCTION_INIT (v64sf);
v64si ix;
GET_FLOAT_WORD (ix, x, NO_COND);
/* |x| ~< pi/4 */
ix &= 0x7fffffff;
VECTOR_RETURN (v64sf_kernel_tanf (x, VECTOR_INIT (0.0f), VECTOR_INIT (1), __mask),
ix <= 0x3f490fda);
/* tan(Inf or NaN) is NaN */
VECTOR_RETURN (x-x, ~FLT_UWORD_IS_FINITE(ix)); /* NaN */
/* argument reduction needed */
v64sf y[2];
v64si n = v64sf_rem_pio2f (x,y);
VECTOR_RETURN (v64sf_kernel_tanf (y[0], y[1], 1-((n&1)<<1), __mask), // 1 -- n even
NO_COND); // -1 -- n odd
FUNCTION_RETURN;
}
DEF_VARIANTS (tanf, sf, sf)
#endif

View File

@ -0,0 +1,92 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the BSD License. This program is distributed in the hope that
* it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
* including the implied warranties of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. A copy of this license is available at
* http://www.opensource.org/licenses. Any Red Hat trademarks that are
* incorporated in the source code or documentation are not subject to
* the BSD License and may only be used or replicated with the express
* permission of Red Hat, Inc.
*/
/*****************************************************************
* The following routines are coded directly from the algorithms
* and coefficients given in "Software Manual for the Elementary
* Functions" by William J. Cody, Jr. and William Waite, Prentice
* Hall, 1980.
*****************************************************************/
/* Based on newlib/libm/mathfp/sf_tanh.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_expf_aux (v64sf, v64si);
static const float LN3_OVER2 = 0.54930614433405484570;
static const float p[] = { -0.16134119023996228053e+4,
-0.99225929672236083313e+2,
-0.96437492777225469787 };
static const float q[] = { 0.48402357071988688686e+4,
0.22337720718962312926e+4,
0.11274474380534949335e+3 };
#if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
DEF_VS_MATH_FUNC (v64sf, tanhf, v64sf x)
{
FUNCTION_INIT (v64sf);
v64sf f = __builtin_gcn_fabsvf (x);
v64sf res;
/* Check if the input is too big. */
VECTOR_IF (f > (float) BIGX, cond)
VECTOR_COND_MOVE (res, VECTOR_INIT (1.0f), cond);
VECTOR_ELSEIF (f > LN3_OVER2, cond)
VECTOR_COND_MOVE (res, 1.0f - 2.0f / (v64sf_expf_aux (2.0f * f, __mask) + 1.0f),
cond);
/* Check if the input is too small. */
VECTOR_ELSEIF (f < z_rooteps_f, cond)
VECTOR_COND_MOVE (res, f, cond);
/* Calculate the Taylor series. */
VECTOR_ELSE (cond)
v64sf g = f * f;
v64sf P = (p[2] * g + p[1]) * g + p[0];
v64sf Q = ((g + q[2]) * g + q[1]) * g + q[0];
v64sf R = g * (P / Q);
VECTOR_COND_MOVE (res, f + f * R, cond);
VECTOR_ENDIF
VECTOR_COND_MOVE (res, -res, x < 0.0f);
VECTOR_RETURN (res, NO_COND);
FUNCTION_RETURN;
}
DEF_VARIANTS (tanhf, sf, sf)
#endif

View File

@ -0,0 +1,42 @@
/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* Based on newlib/libm/math/ef_tgamma.c in Newlib. */
#include "amdgcnmach.h"
v64sf v64sf_expf_aux (v64sf x, v64si __mask);
v64sf v64sf_lgammaf_r_aux (v64sf x, v64si *signgamp, v64si __mask);
DEF_VS_MATH_FUNC (v64sf, tgammaf, v64sf x)
{
v64si signgam_local;
v64sf y = v64sf_expf_aux(v64sf_lgammaf_r_aux(x, &signgam_local, __mask), __mask);
VECTOR_COND_MOVE(y, -y, signgam_local < 0);
return y;
}
DEF_VARIANTS (tgammaf, sf, sf)