mirror of
git://sourceware.org/git/newlib-cygwin.git
synced 2025-01-14 17:59:28 +08:00
d4bcecb3e9
The single-precision trigonometric functions show rather high errors in specific ranges starting at about 30000 radians. For example the sinf procedure produces an error of 7626.55 ULP with the input 5.195880078125e+04 (0x474AF6CD) (compared with MPFR in 128bit precision). For the test we used 100k values evenly spaced in the range of [30k, 70k]. The issues are periodic at higher ranges. This error was introduced when the double precision range reduction was first converted to float. The shift by 8 bits always returns 0 as iq is never higher than 255. The fix reduces the error of the example above to 0.45 ULP, highest error within the test set fell to 1.31 ULP, which is not perfect, but still a significant improvement. Testing other previously erroneous ranges no longer show particularly large accuracy errors.
210 lines
5.1 KiB
C
210 lines
5.1 KiB
C
/* kf_rem_pio2.c -- float version of k_rem_pio2.c
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
*/
|
|
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#include "fdlibm.h"
|
|
|
|
/* In the float version, the input parameter x contains 8 bit
|
|
integers, not 24 bit integers. 113 bit precision is not supported. */
|
|
|
|
#ifdef __STDC__
|
|
static const int init_jk[] = {4,7,9}; /* initial value for jk */
|
|
#else
|
|
static int init_jk[] = {4,7,9};
|
|
#endif
|
|
|
|
#ifdef __STDC__
|
|
static const float PIo2[] = {
|
|
#else
|
|
static float PIo2[] = {
|
|
#endif
|
|
1.5703125000e+00, /* 0x3fc90000 */
|
|
4.5776367188e-04, /* 0x39f00000 */
|
|
2.5987625122e-05, /* 0x37da0000 */
|
|
7.5437128544e-08, /* 0x33a20000 */
|
|
6.0026650317e-11, /* 0x2e840000 */
|
|
7.3896444519e-13, /* 0x2b500000 */
|
|
5.3845816694e-15, /* 0x27c20000 */
|
|
5.6378512969e-18, /* 0x22d00000 */
|
|
8.3009228831e-20, /* 0x1fc40000 */
|
|
3.2756352257e-22, /* 0x1bc60000 */
|
|
6.3331015649e-25, /* 0x17440000 */
|
|
};
|
|
|
|
#ifdef __STDC__
|
|
static const float
|
|
#else
|
|
static float
|
|
#endif
|
|
zero = 0.0,
|
|
one = 1.0,
|
|
two8 = 2.5600000000e+02, /* 0x43800000 */
|
|
twon8 = 3.9062500000e-03; /* 0x3b800000 */
|
|
|
|
#ifdef __STDC__
|
|
int __kernel_rem_pio2f(float *x, float *y, int e0, int nx, int prec, const __int32_t *ipio2)
|
|
#else
|
|
int __kernel_rem_pio2f(x,y,e0,nx,prec,ipio2)
|
|
float x[], y[]; int e0,nx,prec; __int32_t ipio2[];
|
|
#endif
|
|
{
|
|
__int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
|
|
float z,fw,f[20],fq[20],q[20];
|
|
|
|
/* initialize jk*/
|
|
jk = init_jk[prec];
|
|
jp = jk;
|
|
|
|
/* determine jx,jv,q0, note that 3>q0 */
|
|
jx = nx-1;
|
|
jv = (e0-3)/8; if(jv<0) jv=0;
|
|
q0 = e0-8*(jv+1);
|
|
|
|
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
|
|
j = jv-jx; m = jx+jk;
|
|
for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (float) ipio2[j];
|
|
|
|
/* compute q[0],q[1],...q[jk] */
|
|
for (i=0;i<=jk;i++) {
|
|
for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
|
|
q[i] = fw;
|
|
}
|
|
|
|
jz = jk;
|
|
recompute:
|
|
/* distill q[] into iq[] reversingly */
|
|
for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
|
|
fw = (float)((__int32_t)(twon8* z));
|
|
iq[i] = (__int32_t)(z-two8*fw);
|
|
z = q[j-1]+fw;
|
|
}
|
|
|
|
/* compute n */
|
|
z = scalbnf(z,(int)q0); /* actual value of z */
|
|
z -= (float)8.0*floorf(z*(float)0.125); /* trim off integer >= 8 */
|
|
n = (__int32_t) z;
|
|
z -= (float)n;
|
|
ih = 0;
|
|
if(q0>0) { /* need iq[jz-1] to determine n */
|
|
i = (iq[jz-1]>>(8-q0)); n += i;
|
|
iq[jz-1] -= i<<(8-q0);
|
|
ih = iq[jz-1]>>(7-q0);
|
|
}
|
|
else if(q0==0) ih = iq[jz-1]>>7;
|
|
else if(z>=(float)0.5) ih=2;
|
|
|
|
if(ih>0) { /* q > 0.5 */
|
|
n += 1; carry = 0;
|
|
for(i=0;i<jz ;i++) { /* compute 1-q */
|
|
j = iq[i];
|
|
if(carry==0) {
|
|
if(j!=0) {
|
|
carry = 1; iq[i] = 0x100- j;
|
|
}
|
|
} else iq[i] = 0xff - j;
|
|
}
|
|
if(q0>0) { /* rare case: chance is 1 in 12 */
|
|
switch(q0) {
|
|
case 1:
|
|
iq[jz-1] &= 0x7f; break;
|
|
case 2:
|
|
iq[jz-1] &= 0x3f; break;
|
|
}
|
|
}
|
|
if(ih==2) {
|
|
z = one - z;
|
|
if(carry!=0) z -= scalbnf(one,(int)q0);
|
|
}
|
|
}
|
|
|
|
/* check if recomputation is needed */
|
|
if(z==zero) {
|
|
j = 0;
|
|
for (i=jz-1;i>=jk;i--) j |= iq[i];
|
|
if(j==0) { /* need recomputation */
|
|
for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
|
|
|
|
for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
|
|
f[jx+i] = (float) ipio2[jv+i];
|
|
for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
|
|
q[i] = fw;
|
|
}
|
|
jz += k;
|
|
goto recompute;
|
|
}
|
|
}
|
|
|
|
/* chop off zero terms */
|
|
if(z==(float)0.0) {
|
|
jz -= 1; q0 -= 8;
|
|
while(iq[jz]==0) { jz--; q0-=8;}
|
|
} else { /* break z into 8-bit if necessary */
|
|
z = scalbnf(z,-(int)q0);
|
|
if(z>=two8) {
|
|
fw = (float)((__int32_t)(twon8*z));
|
|
iq[jz] = (__int32_t)(z-two8*fw);
|
|
jz += 1; q0 += 8;
|
|
iq[jz] = (__int32_t) fw;
|
|
} else iq[jz] = (__int32_t) z ;
|
|
}
|
|
|
|
/* convert integer "bit" chunk to floating-point value */
|
|
fw = scalbnf(one,(int)q0);
|
|
for(i=jz;i>=0;i--) {
|
|
q[i] = fw*(float)iq[i]; fw*=twon8;
|
|
}
|
|
|
|
/* compute PIo2[0,...,jp]*q[jz,...,0] */
|
|
for(i=jz;i>=0;i--) {
|
|
for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
|
|
fq[jz-i] = fw;
|
|
}
|
|
|
|
/* compress fq[] into y[] */
|
|
switch(prec) {
|
|
case 0:
|
|
fw = 0.0;
|
|
for (i=jz;i>=0;i--) fw += fq[i];
|
|
y[0] = (ih==0)? fw: -fw;
|
|
break;
|
|
case 1:
|
|
case 2:
|
|
fw = 0.0;
|
|
for (i=jz;i>=0;i--) fw += fq[i];
|
|
y[0] = (ih==0)? fw: -fw;
|
|
fw = fq[0]-fw;
|
|
for (i=1;i<=jz;i++) fw += fq[i];
|
|
y[1] = (ih==0)? fw: -fw;
|
|
break;
|
|
case 3: /* painful */
|
|
for (i=jz;i>0;i--) {
|
|
fw = fq[i-1]+fq[i];
|
|
fq[i] += fq[i-1]-fw;
|
|
fq[i-1] = fw;
|
|
}
|
|
for (i=jz;i>1;i--) {
|
|
fw = fq[i-1]+fq[i];
|
|
fq[i] += fq[i-1]-fw;
|
|
fq[i-1] = fw;
|
|
}
|
|
for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
|
|
if(ih==0) {
|
|
y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
|
|
} else {
|
|
y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
|
|
}
|
|
}
|
|
return n&7;
|
|
}
|