Support SSE float environment in fenv.h functions.

* cpu_features.c: New file.
	* cpu_features.h: New file.
	* crt1.c: Include "cpu_features.h".
	(__mingw_CRTStartup): Call cpu_features_init().
	* Makefile.in (MING_OBJS): Add cpu_features.c.
	(SRCDIST_FILES): Add cpu_features.c, cpu_features.h.
	* include/fenv,h ( fenv_t;): Append  __mxcsr field.
	(__MXCSR_EXCEPT_FLAG_SHIFT): New define.
	(__MXCSR_EXCEPT_MASK_SHIFT): New define.
	(__MXCSR_ROUND_FLAG_SHIFT): New define.
	* mingwex/feclearexcept.c: Include "cpu_features.h".
	Handle SSE environment.
	* mingwex/fegetenv.c: Likewise.
	* mingwex/feholdexcept.c: Likewise.
	* mingwex/fesetenv.c: Likewise.
	* mingwex/fesetexceptflag.c: Likewise.
	* mingwex/fesetround.c: Likewise.
	* mingwex/fetestexcept.c: Likewise.
	* mingwex/feupdateenv.c: Likewise.
	* mingwex/fegetround.c: Add comment.
This commit is contained in:
Danny Smith 2006-07-03 10:32:58 +00:00
parent 69d5f3329f
commit f34428eb35
15 changed files with 252 additions and 17 deletions

View File

@ -1,3 +1,6 @@
2006-07-03 Danny Smith <dannysmith@users.sourceforge.net>
2006-06-25 Chris Sutcliffe <ir0nh34d@users.sourceforge.net> 2006-06-25 Chris Sutcliffe <ir0nh34d@users.sourceforge.net>
* Include/_mingw.h: Increment version to 3.10. * Include/_mingw.h: Increment version to 3.10.

View File

@ -156,7 +156,7 @@ CRT0S = crt1.o dllcrt1.o crt2.o dllcrt2.o CRT_noglob.o crtmt.o crtst.o \
CRT_fp8.o CRT_fp10.o txtmode.o binmode.o CRT_fp8.o CRT_fp10.o txtmode.o binmode.o
MINGW_OBJS = CRTglob.o CRTfmode.o CRTinit.o dllmain.o gccmain.o \ MINGW_OBJS = CRTglob.o CRTfmode.o CRTinit.o dllmain.o gccmain.o \
main.o crtst.o mthr_stub.o CRT_fp10.o txtmode.o \ main.o crtst.o mthr_stub.o CRT_fp10.o txtmode.o \
pseudo-reloc.o pseudo-reloc-list.o pseudo-reloc.o pseudo-reloc-list.o cpu_features.o
MOLD_OBJS = isascii.o iscsym.o iscsymf.o toascii.o \ MOLD_OBJS = isascii.o iscsym.o iscsymf.o toascii.o \
strcasecmp.o strncasecmp.o wcscmpi.o strcasecmp.o strncasecmp.o wcscmpi.o
@ -187,7 +187,7 @@ mthr.c mthr_init.c mthr_stub.c readme.txt \
isascii.c iscsym.c iscsymf.c toascii.c \ isascii.c iscsym.c iscsymf.c toascii.c \
strcasecmp.c strncasecmp.c wcscmpi.c \ strcasecmp.c strncasecmp.c wcscmpi.c \
CRT_fp8.c CRT_fp10.c test_headers.c txtmode.c binmode.c pseudo-reloc.c \ CRT_fp8.c CRT_fp10.c test_headers.c txtmode.c binmode.c pseudo-reloc.c \
pseudo-reloc-list.c \ pseudo-reloc-list.c cpu_features.c cpu_features.h\
DISCLAIMER CONTRIBUTORS DISCLAIMER CONTRIBUTORS

105
winsup/mingw/cpu_features.c Executable file
View File

@ -0,0 +1,105 @@
#include <stdbool.h>
#include "cpu_features.h"
/* level 1 edx bits */
#define EDX_CX8 (1 << 8) /* CMPXCHG8B */
#define EDX_CMOV (1 << 15)
#define EDX_MMX (1 << 23)
#define EDX_FXSR (1 << 24) /* FXSAVE and FXRSTOR */
#define EDX_SSE (1 << 25)
#define EDX_SSE2 (1 << 26)
/* level 1 ecx bits */
#define ECX_SSE3 (1 << 0)
#define ECX_CX16 (1 << 13) /* CMPXCHG16B */
/* extended level 0x80000001 edx bits */
#define EDX_3DNOW (1 << 31)
#define EDX_3DNOWP (1 << 30)
#define EDX_LM (1 << 29) /*LONG MODE */
#define __cpuid(level,a,b,c,d) \
__asm__ __volatile__ ("cpuid;" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)\
: "0" (level))
/* Combine the different cpuid flags into a single bitmap. */
unsigned int __cpu_features = 0;
void __cpu_features_init (void)
{
unsigned int eax, ebx, ecx, edx;
/* Try to change the value of CPUID bit (bit 21) in EFLAGS.
If the bit can be toggled, CPUID is supported. */
asm volatile ("pushfl; pushfl; popl %0;"
"movl %0,%1; xorl %2,%0;"
"pushl %0; popfl; pushfl; popl %0; popfl"
: "=&r" (eax), "=&r" (ebx)
: "i" (0x00200000));
if (((eax ^ ebx) & 0x00200000) == 0)
return;
__cpuid (0, eax, ebx, ecx, edx);
if (eax == 0)
return;
__cpuid (1, eax, ebx, ecx, edx);
if (edx & EDX_CX8)
__cpu_features |= _CRT_CMPXCHG8B;
if (edx & EDX_CMOV)
__cpu_features |= _CRT_CMOV;
if (edx & EDX_MMX)
__cpu_features |= _CRT_MMX;
if (edx & EDX_FXSR)
__cpu_features |= _CRT_FXSR;
if (edx & EDX_SSE)
__cpu_features |= _CRT_SSE;
if (edx & EDX_SSE2)
__cpu_features |= _CRT_SSE2;
if (ecx & ECX_SSE3)
__cpu_features |= _CRT_SSE3;
if (ecx & ECX_CX16)
__cpu_features |= _CRT_CMPXCHG16B;
__cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax < 0x80000001)
return;
__cpuid (0x80000001, eax, ebx, ecx, edx);
if (edx & EDX_3DNOW);
__cpu_features |= _CRT_3DNOW;
if (edx & EDX_3DNOWP)
__cpu_features |= _CRT_3DNOWP;
return;
}
#ifdef TEST
#include <stdio.h>
#define report(feature) \
if ((feature) & __cpu_features) printf( #feature " found\n")
int main()
{
__cpu_features_init();
report(_CRT_CMPXCHG8B);
report(_CRT_CMOV);
report(_CRT_MMX);
report(_CRT_FXSR);
report(_CRT_SSE);
report(_CRT_SSE2);
report(_CRT_SSE3);
report(_CRT_CMPXCHG16B);
report(_CRT_3DNOW);
report(_CRT_3DNOWP);
return 0;
}
#endif

23
winsup/mingw/cpu_features.h Executable file
View File

@ -0,0 +1,23 @@
#ifndef _CPU_FEATURES_H
#define _CPU_FEATURES_H
#include <stdbool.h>
#define _CRT_CMPXCHG8B 0x0001
#define _CRT_CMOV 0x0002
#define _CRT_MMX 0x0004
#define _CRT_FXSR 0x0008
#define _CRT_SSE 0x0010
#define _CRT_SSE2 0x0020
#define _CRT_SSE3 0x0040
#define _CRT_CMPXCHG16B 0x0080
#define _CRT_3DNOW 0x0100
#define _CRT_3DNOWP 0x0200
extern unsigned int __cpu_features;
/* Currently we use this in fpenv functions */
#define __HAS_SSE __cpu_features & _CRT_SSE
#endif

View File

@ -27,6 +27,7 @@
* be manually synchronized, but it does lead to this not-generally- * be manually synchronized, but it does lead to this not-generally-
* a-good-idea use of include. */ * a-good-idea use of include. */
#include "init.c" #include "init.c"
#include "cpu_features.h"
extern void _pei386_runtime_relocator (void); extern void _pei386_runtime_relocator (void);
@ -195,6 +196,7 @@ __mingw_CRTStartup (void)
/* /*
* Initialize floating point unit. * Initialize floating point unit.
*/ */
__cpu_features_init (); /* Do we have SSE, etc.*/
_fpreset (); /* Supplied by the runtime library. */ _fpreset (); /* Supplied by the runtime library. */
/* /*

View File

@ -1,7 +1,6 @@
#ifndef _FENV_H_ #ifndef _FENV_H_
#define _FENV_H_ #define _FENV_H_
/* FPU status word exception flags */ /* FPU status word exception flags */
#define FE_INVALID 0x01 #define FE_INVALID 0x01
#define FE_DENORMAL 0x02 #define FE_DENORMAL 0x02
@ -18,6 +17,18 @@
#define FE_UPWARD 0x0800 #define FE_UPWARD 0x0800
#define FE_TOWARDZERO 0x0c00 #define FE_TOWARDZERO 0x0c00
/* The MXCSR exception flags are the same as the
FE flags. */
#define __MXCSR_EXCEPT_FLAG_SHIFT 0
/* How much to shift FE status word exception flags
to get the MXCSR exeptions masks, */
#define __MXCSR_EXCEPT_MASK_SHIFT 7
/* How much to shift FE control word rounding flags
to get MXCSR rounding flags, */
#define __MXCSR_ROUND_FLAG_SHIFT 3
#ifndef RC_INVOKED #ifndef RC_INVOKED
/* /*
For now, support only for the basic abstraction of flags that are For now, support only for the basic abstraction of flags that are
@ -26,8 +37,10 @@
*/ */
typedef unsigned short fexcept_t; typedef unsigned short fexcept_t;
/* This 28-byte struct represents the entire floating point /* This 32-byte struct represents the entire floating point
environment as stored by fnstenv or fstenv */ environment as stored by fnstenv or fstenv, augmented by
the contents of the MXCSR register, as stored by stmxcsr
(if CPU supports it). */
typedef struct typedef struct
{ {
unsigned short __control_word; unsigned short __control_word;
@ -41,7 +54,8 @@ typedef struct
unsigned short __opcode; unsigned short __opcode;
unsigned int __data_offset; unsigned int __data_offset;
unsigned short __data_selector; unsigned short __data_selector;
unsigned short __unused3; unsigned short __unused3;
unsigned int __mxcsr; /* contents of the MXCSR register */
} fenv_t; } fenv_t;

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.1 /* 7.6.2.1
The feclearexcept function clears the supported exceptions The feclearexcept function clears the supported exceptions
@ -7,9 +8,17 @@
int feclearexcept (int excepts) int feclearexcept (int excepts)
{ {
fenv_t _env; fenv_t _env;
excepts &= FE_ALL_EXCEPT;
__asm__ volatile ("fnstenv %0;" : "=m" (_env)); /* get the env */ __asm__ volatile ("fnstenv %0;" : "=m" (_env)); /* get the env */
_env.__status_word &= ~(excepts & FE_ALL_EXCEPT); /* clear the except */ _env.__status_word &= ~excepts; /* clear the except */
__asm__ volatile ("fldenv %0;" :: "m" (_env)); /*set the env */ __asm__ volatile ("fldenv %0;" :: "m" (_env)); /*set the env */
if (__HAS_SSE)
{
unsigned _csr;
__asm__ volatile("stmxcsr %0" : "=m" (_csr)); /* get the register */
_csr &= ~excepts; /* clear the except */
__asm__ volatile("ldmxcsr %0" : : "m" (_csr)); /* set the register */
}
return 0; return 0;
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.1 /* 7.6.4.1
The fegetenv function stores the current floating-point environment The fegetenv function stores the current floating-point environment
@ -10,5 +11,10 @@ int fegetenv (fenv_t * envp)
/* fnstenv sets control word to non-stop for all exceptions, so we /* fnstenv sets control word to non-stop for all exceptions, so we
need to reload our env to restore the original mask. */ need to reload our env to restore the original mask. */
__asm__ ("fldenv %0" : : "m" (*envp)); __asm__ ("fldenv %0" : : "m" (*envp));
/* And the SSE environment. */
if (__HAS_SSE)
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
return 0; return 0;
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.3.1 /* 7.6.3.1
The fegetround function returns the value of the rounding direction The fegetround function returns the value of the rounding direction
@ -9,6 +10,10 @@ fegetround (void)
{ {
unsigned short _cw; unsigned short _cw;
__asm__ ("fnstcw %0;" : "=m" (_cw)); __asm__ ("fnstcw %0;" : "=m" (_cw));
/* If the MXCSR flag is different, there is no way to indicate, so just
report the FPU flag. */
return _cw return _cw
& (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO); & (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.2 /* 7.6.4.2
The feholdexcept function saves the current floating-point The feholdexcept function saves the current floating-point
@ -12,5 +13,18 @@ int feholdexcept (fenv_t * envp)
/* fnstenv sets control word to non-stop for all exceptions, so all we /* fnstenv sets control word to non-stop for all exceptions, so all we
need to do is clear the exception flags. */ need to do is clear the exception flags. */
__asm__ ("fnclex"); __asm__ ("fnclex");
if (__HAS_SSE)
{
unsigned int _csr;
/* Save the SSE MXCSR register. */
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
/* Clear the exception flags. */
_csr = envp->__mxcsr & ~FE_ALL_EXCEPT;
/* Set exception mask to non-stop */
_csr |= (FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT) /*= 0x1f80 */;
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
}
return 0; return 0;
} }

View File

@ -1,5 +1,6 @@
#include <fenv.h> #include <fenv.h>
#include <float.h> #include <float.h>
#include "cpu_features.h"
/* 7.6.4.3 /* 7.6.4.3
The fesetenv function establishes the floating-point environment The fesetenv function establishes the floating-point environment
@ -15,6 +16,11 @@ extern void (*_imp___fpreset)( void ) ;
int fesetenv (const fenv_t * envp) int fesetenv (const fenv_t * envp)
{ {
/* Default mxcsr status is to mask all exceptions. All other bits
are zero. */
unsigned int _csr = FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT /*= 0x1f80 */;
if (envp == FE_PC64_ENV) if (envp == FE_PC64_ENV)
/* /*
* fninit initializes the control register to 0x37f, * fninit initializes the control register to 0x37f,
@ -37,7 +43,15 @@ int fesetenv (const fenv_t * envp)
_fpreset(); _fpreset();
else else
__asm__ ("fldenv %0;" : : "m" (*envp)); {
__asm__ ("fldenv %0;" : : "m" (*envp));
/* Setting the reserved high order bits of MXCSR causes a segfault */
_csr = envp ->__mxcsr & 0xffff;
}
/* Set MXCSR */
if (__HAS_SSE)
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
return 0; return 0;
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.4 /* 7.6.2.4
The fesetexceptflag function sets the complete status for those The fesetexceptflag function sets the complete status for those
@ -18,5 +19,15 @@ int fesetexceptflag (const fexcept_t * flagp, int excepts)
_env.__status_word &= ~excepts; _env.__status_word &= ~excepts;
_env.__status_word |= (*flagp & excepts); _env.__status_word |= (*flagp & excepts);
__asm__ volatile ("fldenv %0;" : : "m" (_env)); __asm__ volatile ("fldenv %0;" : : "m" (_env));
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ __volatile__("stmxcsr %0" : "=m" (_csr));
_csr &= ~excepts;
_csr |= *flagp & excepts;
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
}
return 0; return 0;
} }

View File

@ -1,4 +1,6 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.3.2 /* 7.6.3.2
The fesetround function establishes the rounding direction The fesetround function establishes the rounding direction
represented by its argument round. If the argument is not equal represented by its argument round. If the argument is not equal
@ -15,5 +17,14 @@ int fesetround (int mode)
_cw &= ~(FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO); _cw &= ~(FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
_cw |= mode; _cw |= mode;
__asm__ volatile ("fldcw %0;" : : "m" (_cw)); __asm__ volatile ("fldcw %0;" : : "m" (_cw));
if (__HAS_SSE)
{
__asm__ volatile ("stmxcsr %0" : "=m" (_cw));
_cw &= ~ 0x6000;
_cw |= (mode << __MXCSR_ROUND_FLAG_SHIFT);
__asm__ volatile ("ldmxcsr %0" : : "m" (_cw));
}
return 0; return 0;
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.5 /* 7.6.2.5
The fetestexcept function determines which of a specified subset of The fetestexcept function determines which of a specified subset of
the exception flags are currently set. The excepts argument the exception flags are currently set. The excepts argument
@ -9,7 +10,18 @@
int fetestexcept (int excepts) int fetestexcept (int excepts)
{ {
unsigned short _sw;
__asm__ ("fnstsw %%ax" : "=a" (_sw)); unsigned int _res;
return _sw & excepts & FE_ALL_EXCEPT; __asm__ ("fnstsw %%ax" : "=a" (_res));
/* If SSE supported, return the union of the FPU and SSE flags. */
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ volatile("stmxcsr %0" : "=m" (_csr));
_res |= _csr;
}
return (_res & excepts & FE_ALL_EXCEPT);
} }

View File

@ -1,4 +1,5 @@
#include <fenv.h> #include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.4 /* 7.6.4.4
The feupdateenv function saves the currently raised exceptions in The feupdateenv function saves the currently raised exceptions in
@ -8,13 +9,18 @@
set by a call to feholdexcept or fegetenv, or equal the macro set by a call to feholdexcept or fegetenv, or equal the macro
FE_DFL_ENV or an implementation-defined environment macro. */ FE_DFL_ENV or an implementation-defined environment macro. */
/* FIXME: this works but surely there must be a better way. */
int feupdateenv (const fenv_t * envp) int feupdateenv (const fenv_t * envp)
{ {
unsigned int _fexcept = fetestexcept (FE_ALL_EXCEPT); /*save excepts */ unsigned int _fexcept;
__asm__ ("fnstsw %%ax" : "=a" (_fexcept)); /*save excepts */
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ ("stmxcsr %0" : "=m" (_csr));
_fexcept |= _csr;
}
fesetenv (envp); /* install the env */ fesetenv (envp); /* install the env */
feraiseexcept (_fexcept); /* raise the execept */ feraiseexcept (_fexcept & FE_ALL_EXCEPT); /* raise the execeptions */
return 0; return 0;
} }