345 lines
9.3 KiB
ArmAsm

/*
* Copyright (c) 2006-2023, RT-Thread Development Team
*
* SPDX-License-Identifier: Apache-2.0
*
* Date Author Notes
* 2020-01-15 bigmagic the first version
* 2020-08-10 SummerGift support clang compiler
* 2023-04-29 GuEe-GUI support kernel's ARM64 boot header
*/
#ifndef __ASSEMBLY__
#define __ASSEMBLY__
#endif
#include <mmu.h>
#include <rtconfig.h>
#define ARM64_IMAGE_FLAG_BE_SHIFT 0
#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1)
#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
#define ARM64_IMAGE_FLAG_LE 0
#define ARM64_IMAGE_FLAG_BE 1
#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1
#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2
#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3
#define ARM64_IMAGE_FLAG_PHYS_BASE 1
#define _HEAD_FLAG(field) (_HEAD_FLAG_##field << ARM64_IMAGE_FLAG_##field##_SHIFT)
#ifdef ARCH_CPU_BIG_ENDIAN
#define _HEAD_FLAG_BE ARM64_IMAGE_FLAG_BE
#else
#define _HEAD_FLAG_BE ARM64_IMAGE_FLAG_LE
#endif
#define _HEAD_FLAG_PAGE_SIZE ((ARCH_PAGE_SHIFT - 10) / 2)
#define _HEAD_FLAG_PHYS_BASE 1
#define _HEAD_FLAGS (_HEAD_FLAG(BE) | _HEAD_FLAG(PAGE_SIZE) | _HEAD_FLAG(PHYS_BASE))
.macro get_phy, reg, symbol
adrp \reg, \symbol
add \reg, \reg, #:lo12:\symbol
.endm
.macro get_pvoff, tmp, out
ldr \tmp, =.boot_cpu_stack_top
get_phy \out, .boot_cpu_stack_top
sub \out, \out, \tmp
.endm
.section ".text.entrypoint","ax"
#ifdef RT_USING_OFW
/*
* Our goal is to boot the rt-thread as possible without modifying the
* bootloader's config, so we use the kernel's boot header for ARM64:
* https://www.kernel.org/doc/html/latest/arm64/booting.html#call-the-kernel-image
*/
_head:
b _start /* Executable code */
.long 0 /* Executable code */
.quad _text_offset /* Image load offset from start of RAM, little endian */
.quad _end - _head /* Effective Image size, little endian (_end defined in link.lds) */
.quad _HEAD_FLAGS /* Kernel flags, little endian */
.quad 0 /* Reserved */
.quad 0 /* Reserved */
.quad 0 /* Reserved */
.ascii "ARM\x64" /* Magic number */
.long 0 /* Reserved (used for PE COFF offset) */
#endif
/* Variable registers: x21~x28 */
dtb_paddr .req x21
boot_arg0 .req x22
boot_arg1 .req x23
boot_arg2 .req x24
stack_top .req x25
.global _start
_start:
/*
* Boot CPU general-purpose register settings:
* x0 = physical address of device tree blob (dtb) in system RAM.
* x1 = 0 (reserved for future use)
* x2 = 0 (reserved for future use)
* x3 = 0 (reserved for future use)
*/
mov dtb_paddr, x0
mov boot_arg0, x1
mov boot_arg1, x2
mov boot_arg2, x3
/* Save cpu stack */
get_phy stack_top, .boot_cpu_stack_top
/* Save cpu id temp */
msr tpidr_el1, xzr
bl init_cpu_el
bl init_kernel_bss
bl init_cpu_stack_early
#ifdef RT_USING_OFW
/* Save devicetree info */
mov x0, dtb_paddr
bl rt_hw_fdt_install_early
#endif
/* Now we are in the end of boot cpu process */
ldr x8, =rtthread_startup
b init_mmu_early
kernel_start:
/* jump to the PE's system entry */
mov x29, xzr
mov x30, x8
br x8
cpu_idle:
wfe
b cpu_idle
#ifdef RT_USING_SMP
.globl _secondary_cpu_entry
_secondary_cpu_entry:
#ifdef RT_USING_OFW
/* Read cpu id */
mrs x5, mpidr_el1
ldr x1, =rt_cpu_mpidr_table
get_pvoff x4 x2
add x1, x1, x2
mov x2, #0
ldr x4, =0xff00ffffff
and x0, x5, x4
.cpu_id_confirm:
add x2, x2, #1 /* Next cpu id inc */
ldr x3, [x1], #8
cmp x3, #0
beq cpu_idle
and x3, x3, x4
cmp x3, x0
bne .cpu_id_confirm
/* Save this mpidr */
str x5, [x1, #-8]
/* Get cpu id success */
sub x0, x2, #1
msr tpidr_el1, x0 /* Save cpu id global */
#else
bl rt_hw_cpu_id_set
mrs x0, tpidr_el1
#endif
/* Set current cpu's stack top */
sub x0, x0, #1
mov x1, #ARCH_SECONDARY_CPU_STACK_SIZE
get_phy x2, .secondary_cpu_stack_top
msub stack_top, x0, x1, x2
bl init_cpu_el
bl init_cpu_stack_early
/* secondary cpu start to startup */
ldr x8, =rt_hw_secondary_cpu_bsp_start
b enable_mmu_early
#endif /* RT_USING_SMP */
init_cpu_el:
mrs x0, CurrentEL /* CurrentEL Register. bit 2, 3. Others reserved */
lsr x0, x0, #2
and x0, x0, #3
cmp x0, #3
bne .init_cpu_hyp
mov x1, #(1 << 0) /* EL0 and EL1 are in Non-Secure state */
orr x1, x1, #(1 << 4) /* RES1 */
orr x1, x1, #(1 << 5) /* RES1 */
orr x1, x1, #(1 << 10) /* The next lower level is AArch64 */
msr scr_el3, x1
mov x1, #9 /* Next level is 0b1001->EL2h */
orr x1, x1, #(1 << 6) /* Mask FIQ */
orr x1, x1, #(1 << 7) /* Mask IRQ */
orr x1, x1, #(1 << 8) /* Mask SError */
orr x1, x1, #(1 << 9) /* Mask Debug Exception */
msr spsr_el3, x1
get_phy x1, .init_cpu_hyp
msr elr_el3, x1
eret
.init_cpu_hyp:
cmp x0, #2 /* EL1 = 0100 */
bne .init_cpu_sys
/* Enable CNTP for EL1 */
mrs x0, cnthctl_el2 /* Counter-timer Hypervisor Control register */
orr x0, x0, #(1 << 0) /* Don't traps NS EL0/1 accesses to the physical counter */
orr x0, x0, #(1 << 1) /* Don't traps NS EL0/1 accesses to the physical timer */
msr cnthctl_el2, x0
msr cntvoff_el2, xzr
mov x0, #(1 << 31) /* Enable AArch64 in EL1 */
orr x0, x0, #(1 << 1) /* SWIO hardwired */
msr hcr_el2, x0
mov x0, #5 /* Next level is 0b0101->EL1h */
orr x0, x0, #(1 << 6) /* Mask FIQ */
orr x0, x0, #(1 << 7) /* Mask IRQ */
orr x0, x0, #(1 << 8) /* Mask SError */
orr x0, x0, #(1 << 9) /* Mask Debug Exception */
msr spsr_el2, x0
get_phy x0, .init_cpu_sys
msr elr_el2, x0
eret
.init_cpu_sys:
mrs x0, sctlr_el1
orr x0, x0, #(1 << 12) /* Enable Instruction */
bic x0, x0, #(3 << 3) /* Disable SP Alignment check */
bic x0, x0, #(1 << 1) /* Disable Alignment check */
msr sctlr_el1, x0
/* Avoid trap from SIMD or float point instruction */
mov x0, #0x00300000 /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */
msr cpacr_el1, x0
/* Applying context change */
dsb ish
isb
ret
init_kernel_bss:
get_phy x1, __bss_start
get_phy x2, __bss_end
sub x2, x2, x1 /* Get bss size */
and x3, x2, #7 /* x3 is < 7 */
ldr x4, =~0x7
and x2, x2, x4 /* Mask ~7 */
.clean_bss_loop_quad:
cbz x2, .clean_bss_loop_byte
str xzr, [x1], #8
sub x2, x2, #8
b .clean_bss_loop_quad
.clean_bss_loop_byte:
cbz x3, .clean_bss_end
strb wzr, [x1], #1
sub x3, x3, #1
b .clean_bss_loop_byte
.clean_bss_end:
ret
init_cpu_stack_early:
msr spsel, #1
mov sp, stack_top
ret
init_mmu_early:
get_phy x0, .early_page_array
bl set_free_page
get_phy x0, .early_tbl0_page
get_phy x1, .early_tbl1_page
get_pvoff x2 x3
ldr x2, =0x40000000 /* Map 1G memory for kernel space */
bl rt_hw_mem_setup_early
b enable_mmu_early
enable_mmu_early:
get_phy x0, .early_tbl0_page
get_phy x1, .early_tbl1_page
msr ttbr0_el1, x0
msr ttbr1_el1, x1
dsb sy
bl mmu_tcr_init
/*
* OK, now, we don't use sp before jump to kernel, set sp to current cpu's
* stack top to visual address
*/
get_pvoff x1 x0
mov x1, stack_top
sub x1, x1, x0
mov sp, x1
ldr x30, =kernel_start /* Set LR to kernel_start function, it's virtual addresses */
mrs x1, sctlr_el1
orr x1, x1, #(1 << 2) /* Cacheable Normal memory in stage1 */
orr x1, x1, #(1 << 0) /* MMU Enable */
msr sctlr_el1, x1
dsb ish
isb
ic ialluis /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */
dsb ish
isb
tlbi vmalle1 /* Invalidate all stage 1 translations used at EL1 with the current VMID */
dsb ish
isb
ret
/*
* CPU stack builtin
*/
.section ".bss.noclean.cpus_stack"
.align 12
.cpus_stack:
#if defined(RT_USING_SMP) && RT_CPUS_NR > 1
.space (ARCH_SECONDARY_CPU_STACK_SIZE * (RT_CPUS_NR - 1))
.secondary_cpu_stack_top:
#endif
.space ARCH_SECONDARY_CPU_STACK_SIZE
.boot_cpu_stack_top:
/*
* Early page builtin
*/
.section ".bss.noclean.early_page"
.align 12
.early_tbl0_page:
.space ARCH_PAGE_SIZE
.early_tbl1_page:
/* Map 4G -> 2M * 512 entries */
.space 4 * ARCH_PAGE_SIZE
.early_page_array:
.space 24 * ARCH_PAGE_SIZE