From 2805b07fa1f54ef001c2ce78618e8efc87155232 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue, 3 Jul 2018 13:05:31 +0100
Subject: [PATCH] Fix large ulp error in pow without fma very near 1.0

The !HAVE_FAST_FMA code path split r = z/c - 1 into r = rhi + rlo such
that when z = 1-tiny and c = 1 then rlo and rhi could have much larger
magnitude than r which later caused large rounding errors.

So do a nearest rounding instead of truncation at the split.

In newlib with default settings this was observable on some arm targets
that enable the new math code but has no fma.
---
 newlib/libm/common/pow.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/newlib/libm/common/pow.c b/newlib/libm/common/pow.c
index 11964e343..a5504e5e9 100644
--- a/newlib/libm/common/pow.c
+++ b/newlib/libm/common/pow.c
@@ -79,11 +79,13 @@ log_inline (uint64_t ix, double_t *tail)
   logc = T[i].logc;
   logctail = T[i].logctail;
 
-  /* r = z/c - 1, arranged to be exact.  */
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
 #if HAVE_FAST_FMA
   r = fma (z, invc, -1.0);
 #else
-  double_t zhi = asdouble (iz & (-1ULL << 32));
+  /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
+  double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
   double_t zlo = z - zhi;
   double_t rhi = zhi * invc - 1.0;
   double_t rlo = zlo * invc;