121 lines
2.8 KiB
ArmAsm
121 lines
2.8 KiB
ArmAsm
|
/**
|
||
|
* This file has no copyright assigned and is placed in the Public Domain.
|
||
|
* This file is part of the mingw-w64 runtime package.
|
||
|
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
|
||
|
*/
|
||
|
#include <_mingw_mac.h>
|
||
|
|
||
|
.file "ceilf.S"
|
||
|
.text
|
||
|
.align 4
|
||
|
.globl __MINGW_USYMBOL(ceilf)
|
||
|
.def __MINGW_USYMBOL(ceilf); .scl 2; .type 32; .endef
|
||
|
#ifdef __x86_64__
|
||
|
.seh_proc __MINGW_USYMBOL(ceilf)
|
||
|
#endif
|
||
|
|
||
|
__MINGW_USYMBOL(ceilf):
|
||
|
#if defined(_AMD64_) || defined(__x86_64__)
|
||
|
subq $24, %rsp
|
||
|
.seh_stackalloc 24
|
||
|
.seh_endprologue
|
||
|
movd %xmm0, 12(%rsp)
|
||
|
movl 12(%rsp), %eax
|
||
|
movl %eax, %ecx
|
||
|
movl %eax, %edx
|
||
|
sarl $23, %ecx
|
||
|
andl $255, %ecx
|
||
|
subl $127, %ecx
|
||
|
cmpl $22, %ecx
|
||
|
jg .l4
|
||
|
testl %ecx, %ecx
|
||
|
js .l5
|
||
|
movl $8388607, %r8d
|
||
|
sarl %cl, %r8d
|
||
|
testl %eax, %r8d
|
||
|
je .l3
|
||
|
addss .hugeval(%rip), %xmm0
|
||
|
ucomiss .zeroval(%rip), %xmm0
|
||
|
jbe .l2
|
||
|
testl %eax, %eax
|
||
|
jle .l1
|
||
|
movl $8388608, %eax
|
||
|
sarl %cl, %eax
|
||
|
addl %eax, %edx
|
||
|
.l1:
|
||
|
movl %r8d, %eax
|
||
|
notl %eax
|
||
|
andl %edx, %eax
|
||
|
.l2:
|
||
|
movl %eax, 8(%rsp)
|
||
|
movss 8(%rsp), %xmm0
|
||
|
.l3:
|
||
|
addq $24, %rsp
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.l4:
|
||
|
addl $-128, %ecx
|
||
|
jne .l3
|
||
|
addss %xmm0, %xmm0
|
||
|
addq $24, %rsp
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.l5:
|
||
|
addss .hugeval(%rip), %xmm0
|
||
|
ucomiss .zeroval(%rip), %xmm0
|
||
|
jbe .islesseqzero
|
||
|
testl %eax, %eax
|
||
|
js .l6
|
||
|
movl $1065353216, %edx
|
||
|
cmovne %edx, %eax
|
||
|
.islesseqzero:
|
||
|
movl %eax, 8(%rsp)
|
||
|
movss 8(%rsp), %xmm0
|
||
|
addq $24, %rsp
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.l6:
|
||
|
movl $-2147483648, 8(%rsp)
|
||
|
movss 8(%rsp), %xmm0
|
||
|
addq $24, %rsp
|
||
|
ret
|
||
|
.seh_endproc
|
||
|
.section .rdata,"dr"
|
||
|
.align 4
|
||
|
.hugeval:
|
||
|
.long 1900671690
|
||
|
.align 4
|
||
|
.zeroval:
|
||
|
.long 0
|
||
|
#elif defined(_ARM_) || defined(__arm__)
|
||
|
vmrs r1, fpscr
|
||
|
bic r0, r1, #0x00c00000
|
||
|
orr r0, r0, #0x00400000 /* Round towards Plus Infinity */
|
||
|
vmsr fpscr, r0
|
||
|
vcvt.s32.f32 s0, s0
|
||
|
vcvt.f32.s32 s0, s0
|
||
|
vmsr fpscr, r1
|
||
|
bx lr
|
||
|
#elif defined(_X86_) || defined(__i386__)
|
||
|
flds 4(%esp)
|
||
|
subl $8,%esp
|
||
|
|
||
|
fstcw 4(%esp) /* store fpu control word */
|
||
|
|
||
|
/* We use here %edx although only the low 1 bits are defined.
|
||
|
But none of the operations should care and they are faster
|
||
|
than the 16 bit operations. */
|
||
|
movl $0x0800,%edx /* round towards +oo */
|
||
|
orl 4(%esp),%edx
|
||
|
andl $0xfbff,%edx
|
||
|
movl %edx,(%esp)
|
||
|
fldcw (%esp) /* load modified control word */
|
||
|
|
||
|
frndint /* round */
|
||
|
|
||
|
fldcw 4(%esp) /* restore original control word */
|
||
|
|
||
|
addl $8,%esp
|
||
|
ret
|
||
|
#endif
|