4
0
mirror of git://sourceware.org/git/newlib-cygwin.git synced 2025-01-25 08:37:33 +08:00

101 lines
2.4 KiB
ArmAsm
Raw Normal View History

/* a-memset.s -- memset, optimised for fido asm
*
* Copyright (c) 2007 mocom software GmbH & Co KG)
*
* The authors hereby grant permission to use, copy, modify, distribute,
* and license this software and its documentation for any purpose, provided
* that existing copyright notices are retained in all copies and that this
* notice is included verbatim in any distributions. No written agreement,
* license, or royalty fee is required for any of the authorized uses.
* Modifications to this software may be copyrighted by their authors
* and need not follow the licensing terms described here, provided that
* the new terms are clearly indicated on the first page of each file where
* they apply.
*/
#include "m68kasm.h"
.text
.align 4
.globl SYM(memset)
.type SYM(memset), @function
| memset, optimised
|
| strategy:
| - no argument testing (the original memcpy from the GNU lib does
| no checking either)
| - make sure the destination pointer (the write pointer) is long word
| aligned. This is the best you can do, because writing to unaligned
| addresses can be the most costfull thing one could do.
| - we fill long word wise if possible
|
| VG, 2006
|
| bugfixes:
| - distribution of byte value improved - in cases someone gives
| non-byte value
| - residue byte transfer was not working
|
| VG, April 2007
|
SYM(memset):
move.l 4(sp),a0 | dest ptr
move.l 8(sp),d0 | value
move.l 12(sp),d1 | len
cmp.l #16,d1
blo .Lbset | below, byte fills
|
move.l d2,-(sp) | need a register
move.b d0,d2 | distribute low byte to all byte in word
lsl.l #8,d0
move.b d2,d0
move.w d0,d2
swap d0 | rotate 16
move.w d2,d0
|
move.l a0,d2 | copy of src
neg.l d2 | 1 2 3 ==> 3 2 1
and.l #3,d2
beq 2f | is aligned
|
sub.l d2,d1 | fix length
lsr.l #1,d2 | word align needed?
bcc 1f
move.b d0,(a0)+ | fill byte
1:
lsr.l #1,d2 | long align needed?
bcc 2f
move.w d0,(a0)+ | fill word
2:
move.l d1,d2 | number of long transfers (at least 3)
lsr.l #2,d2
subq.l #1,d2
1:
move.l d0,(a0)+ | fill long words
.Llset:
#if !defined (__mcoldfire__)
dbra d2,1b | loop until done
sub.l #0x10000,d2
#else
subq.l #1,d2
#endif
bpl 1b
and.l #3,d1 | residue byte transfers, fixed
move.l (sp)+,d2 | restore d2
bra .Lbset
1:
move.b d0,(a0)+ | fill residue bytes
.Lbset:
#if !defined (__mcoldfire__)
dbra d1,1b | loop until done
#else
subq.l #1,d1
bpl 1b
#endif
move.l 4(sp),d0 | return value
rts