Document the log table generation method
Add comments with enough detail so the log lookup tables can be recreated.
This commit is contained in:
parent
7283d2513c
commit
f92a4c5d2d
|
@ -66,6 +66,32 @@ const struct log2_data __log2_data = {
|
||||||
0x1.a6225e117f92ep-3,
|
0x1.a6225e117f92ep-3,
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log2(x) = k + log2(c) + log2(z/c)
|
||||||
|
log2(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||||
|
into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = (double)log2(c)
|
||||||
|
tab2[i].chi = (double)c
|
||||||
|
tab2[i].clo = (double)(c - (double)c)
|
||||||
|
|
||||||
|
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||||
|
floating point invc candidates around 1/center and selecting one for which
|
||||||
|
|
||||||
|
1) the rounding error in 0x1.8p10 + logc is 0,
|
||||||
|
2) the rounding error in z - chi - clo is < 0x1p-64 and
|
||||||
|
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
|
||||||
|
|
||||||
|
Note: 1) ensures that k + logc can be computed without rounding error, 2)
|
||||||
|
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
|
||||||
|
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
.tab = {
|
.tab = {
|
||||||
#if N == 64
|
#if N == 64
|
||||||
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
||||||
|
|
|
@ -110,6 +110,32 @@ const struct log_data __log_data = {
|
||||||
0x1.2493c29331a5cp-3,
|
0x1.2493c29331a5cp-3,
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log(x) = k ln2 + log(c) + log(z/c)
|
||||||
|
log(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||||
|
into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = (double)log(c)
|
||||||
|
tab2[i].chi = (double)c
|
||||||
|
tab2[i].clo = (double)(c - (double)c)
|
||||||
|
|
||||||
|
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||||
|
floating point invc candidates around 1/center and selecting one for which
|
||||||
|
|
||||||
|
1) the rounding error in 0x1.8p9 + logc is 0,
|
||||||
|
2) the rounding error in z - chi - clo is < 0x1p-66 and
|
||||||
|
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
|
||||||
|
|
||||||
|
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
|
||||||
|
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
|
||||||
|
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
.tab = {
|
.tab = {
|
||||||
#if N == 64
|
#if N == 64
|
||||||
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
|
{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
|
||||||
|
|
|
@ -50,6 +50,28 @@ const struct pow_log_data __pow_log_data = {
|
||||||
-0x1.0002b8b263fc3p-3 * -8,
|
-0x1.0002b8b263fc3p-3 * -8,
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log(x) = k ln2 + log(c) + log(z/c)
|
||||||
|
log(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
|
||||||
|
and z falls into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = round(0x1p43*log(c))/0x1p43
|
||||||
|
tab[i].logctail = (double)(log(c) - logc)
|
||||||
|
|
||||||
|
where c is chosen near the center of the subinterval such that 1/c has only a
|
||||||
|
few precision bits so z/c - 1 is exactly representible as double:
|
||||||
|
|
||||||
|
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
|
||||||
|
|
||||||
|
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
|
||||||
|
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
||||||
|
error and the interval for z is selected such that near x == 1, where log(x)
|
||||||
|
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
||||||
.tab = {
|
.tab = {
|
||||||
#if N == 128
|
#if N == 128
|
||||||
#define A(a,b,c) {a,0,b,c},
|
#define A(a,b,c) {a,0,b,c},
|
||||||
|
|
Loading…
Reference in New Issue