Minor improvement to H8 startup code

Basically the startup code needs to clear memory from _edata to _end. In the past it's been done with a fairly naive copy loop. This changes the code to just call memset and let memset figure out a sensible way to handle the operation given the size and alignment requirements. I don't have performance data on this. I cobbled it together some time ago in response to seeing some of the GCC tests with larger .bss sections taking an insane amount of time to just get from _start to main. With the fixes to the H8 decoder in the simulator it may not matter nearly as much anymore. This has been in my tester for months. Naturally it does not cause any regressions in the H8 port.
2023-12-31 09:08:21 -07:00 · 2023-12-31 09:08:21 -07:00 · ec119e1084
parent 3d10b04f1e
commit ec119e1084
1 changed files with 12 additions and 18 deletions
--- a/newlib/libc/sys/h8300hms/crt0.S
+++ b/newlib/libc/sys/h8300hms/crt0.S
@ -9,12 +9,10 @@
 _start:
 	mov.w	#_stack,sp
 	mov.w	#_edata,r0
-	mov.w	#_end,r1
-	sub.w   r2,r2
-.Loop:	mov.w	r2,@r0
-	adds	#2,r0
-	cmp	r1,r0
-	blo	.Loop
+	mov.w	#_end,r2
+	sub.w   r1,r1
+	sub.w	r0,r2
+	jsr	@_memset
 #ifdef __ELF__
 	mov.l   #__fini,r0
 	jsr     @_atexit
@ -43,12 +41,10 @@ _stack:	.word 	1
 _start:
 	mov.l	#_stack,sp
 	mov.l	#_edata,er0
-	mov.l	#_end,er1
-	sub.w   r2,r2           ; not sure about alignment requirements
-.Loop:	mov.w	r2,@er0		; playing it safe for now
-	adds	#2,er0
-	cmp.l	er1,er0
-	blo	.Loop
+	mov.l	#_end,er2
+	sub.w   r1,r1
+	sub.l	er0,er2
+	jsr	@_memset
 #ifdef __ELF__
 	mov.l   #__fini,er0
 	jsr     @_atexit
@ -77,12 +73,10 @@ _stack:	.long 	1
 _start:
 	mov.l	#_stack,sp
 	mov.l	#_edata,er0
-	mov.l	#_end,er1
-	sub.w   r2,r2           ; not sure about alignment requirements
-.Loop:	mov.w	r2,@er0		; playing it safe for now
-	adds	#2,er0
-	cmp.l	er1,er0
-	blo	.Loop
+	mov.l	#_end,er2
+	sub.w   r1,r1
+	sub.l	er0,er2
+	jsr	@_memset
 #ifdef __ELF__
 	mov.l   #__fini,er0
 	jsr     @_atexit