4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-01-27 01:27:21 +08:00
Kwok Cheung Yeung e18743072b amdgcn: Add vectorized math routines
This implements a set of vectorized math routines to be used by the
compiler auto-vectorizer.  Versions for vectors with 2 lanes up to
64 lanes (in powers of 2) are provided.

These routines are based on the scalar versions of the math routines in
libm/common, libm/math and libm/mathfp.  They make extensive use of the GCC
C vector extensions and GCN-specific builtins in GCC.
2023-01-18 13:22:58 -05:00

243 lines
7.6 KiB
C

/*
* Copyright 2023 Siemens
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
/* Common header file for AMD GCN vector math routines. */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* Copyright (c) 2017-2018 Arm Ltd. All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the company may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* This header is partially based on:
newlib/libm/common/fdlibm.h
newlib/libm/mathfp/zmath.h
newlib/libm/common/math_errf.c
newlib/libm/common/math_config.h */
#include <errno.h>
#include <sys/types.h>
#include <machine/ieeefp.h>
#include "amdgcn_veclib.h"
/* Vectorized versions of macros from newlib/libm/common/fdlibm.h */
#define EXTRACT_WORDS(hi, lo, x) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (x); \
hi = __builtin_convertvector (__tmp.t_v64di >> 32, typeof (hi)); \
lo = __builtin_convertvector (__tmp.t_v64di & 0xffffffff, typeof (lo)); \
} while (0)
#define INSERT_WORDS(x, hi, lo, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64di = __builtin_convertvector (hi, v64di) << 32 | \
__builtin_convertvector (lo, v64di) & 0xffffffff; \
VECTOR_COND_MOVE (x, __tmp.t_v64df, cond); \
} while (0)
#define GET_HIGH_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (y); \
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di >> 32, v64si), \
(cond)); \
} while (0)
#define GET_LOW_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = (y); \
VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di & 0xffffffff, \
v64si), (cond)); \
} while (0)
#define SET_HIGH_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = x; \
__tmp.t_v64di &= 0xffffffff; \
__tmp.t_v64di |= __builtin_convertvector (y, v64di) << 32; \
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
} while (0)
#define SET_LOW_WORD(x, y, cond) \
do { \
vector_union __tmp; \
__tmp.t_v64df = x; \
__tmp.t_v64di &= 0xffffffff00000000ULL; \
__tmp.t_v64di |= __builtin_convertvector (y, v64di); \
VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
} while (0)
#define GET_FLOAT_WORD(x, y, cond) \
VECTOR_COND_MOVE(x, CAST_VECTOR(v64si, (y)), (cond))
#define SET_FLOAT_WORD(x, y, cond) \
VECTOR_COND_MOVE(x, CAST_VECTOR(v64sf, (y)), (cond))
/* Definitions from newlib/libm/common/fdlibm.h */
#ifdef _FLT_LARGEST_EXPONENT_IS_NORMAL
#define FLT_UWORD_IS_FINITE(x) ((x) == (x))
#define FLT_UWORD_IS_NAN(x) ((x) != (x))
#define FLT_UWORD_IS_INFINITE(x) ((x) != (x))
#define FLT_UWORD_MAX 0x7fffffff
#define FLT_UWORD_EXP_MAX 0x43010000
#define FLT_UWORD_LOG_MAX 0x42b2d4fc
#define FLT_UWORD_LOG_2MAX 0x42b437e0
#define HUGE ((float)0X1.FFFFFEP128)
#else
#define FLT_UWORD_IS_FINITE(x) ((x)<0x7f800000)
#define FLT_UWORD_IS_NAN(x) ((x)>0x7f800000)
#define FLT_UWORD_IS_INFINITE(x) ((x)==0x7f800000)
#define FLT_UWORD_MAX 0x7f7fffffL
#define FLT_UWORD_EXP_MAX 0x43000000
#define FLT_UWORD_LOG_MAX 0x42b17217
#define FLT_UWORD_LOG_2MAX 0x42b2d4fc
#define HUGE ((float)3.40282346638528860e+38)
#endif
#define FLT_UWORD_HALF_MAX (FLT_UWORD_MAX-(1L<<23))
#define FLT_LARGEST_EXP (FLT_UWORD_MAX>>23)
#ifdef _FLT_NO_DENORMALS
#define FLT_UWORD_IS_ZERO(x) ((x)<0x00800000)
#define FLT_UWORD_IS_SUBNORMAL(x) ((x) != (x))
#define FLT_UWORD_MIN 0x00800000
#define FLT_UWORD_EXP_MIN 0x42fc0000
#define FLT_UWORD_LOG_MIN 0x42aeac50
#define FLT_SMALLEST_EXP 1
#else
#define FLT_UWORD_IS_ZERO(x) ((x)==0)
#define FLT_UWORD_IS_SUBNORMAL(x) ((x)<0x00800000)
#define FLT_UWORD_MIN 0x00000001
#define FLT_UWORD_EXP_MIN 0x43160000
#define FLT_UWORD_LOG_MIN 0x42cff1b5
#define FLT_SMALLEST_EXP -22
#endif
/* Definitions from newlib/libm/mathfp/zmath.h */
#define NUM 3
#define NAN 2
#define INF 1
#define __PI 3.14159265358979323846
#define __SQRT_HALF 0.70710678118654752440
#define __PI_OVER_TWO 1.57079632679489661923132
#define __INV_PI_OVER_TWO_2_24 10680707.430881743590348355907974
typedef const union
{
unsigned int l[2];
double d;
} udouble;
typedef const union
{
unsigned int l;
float f;
} ufloat;
extern double BIGX;
extern double SMALLX;
extern udouble z_infinity;
extern udouble z_notanum;
extern double z_rooteps;
extern ufloat z_infinity_f;
extern ufloat z_notanum_f;
extern float z_rooteps_f;
/* Vectorized versions of functions from newlib/libm/common/math_errf.c */
static v64sf v64sf_math_oflowf (v64si sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p97f),
VECTOR_INIT (0x1p97f), sign) * 0x1p97f;
}
static v64sf v64sf_math_uflowf (v64si sign)
{
errno = ERANGE;
return VECTOR_MERGE (VECTOR_INIT (-0x1p-95f),
VECTOR_INIT (0x1p-95f), sign) * 0x1p-95f;
}
/* Vectorized versions of functions from newlib/libm/common/math_config.h */
static v64si v64sf_issignalingf_inline (v64sf x)
{
v64si __mask = VECTOR_INIT (-1);
v64si ix;
GET_FLOAT_WORD (ix, x, NO_COND);
/* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
significand is 0 for signalling NaN. */
return ((ix & 0x7f800000) == 0x7f800000) & ((ix & 0x00400000) == 0);
}
/* Vector extensions to sys/reent.h */
struct v64_reent {
v64si _v64si_gamma_signgam;
};
extern struct v64_reent *_v64_reent;
#define _V64_REENT _v64_reent
#define _REENT_V64SI_SIGNGAM(ptr) ((ptr)->_v64si_gamma_signgam)
/* Vector extensions to math.h */
#define v64si_signgam (*__v64si_signgam())
extern v64si* __v64si_signgam (void);
#define __v64si_signgam_r(ptr) _REENT_V64SI_SIGNGAM(ptr)