From b233e29bb63eb5fdab2a7c217c3a13584d4ab878 Mon Sep 17 00:00:00 2001 From: fangjianzhou <73834245+fangjianzhou@users.noreply.github.com> Date: Sat, 16 Dec 2023 18:08:11 +0800 Subject: [PATCH] =?UTF-8?q?[libcpu][aarch64]=20=E4=BD=BF=E7=94=A8=E8=AE=BE?= =?UTF-8?q?=E5=A4=87=E6=A0=91=E5=AF=B9=E5=86=85=E5=AD=98=E8=BF=9B=E8=A1=8C?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=20(#8320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bsp/qemu-virt64-aarch64/drivers/board.c | 96 +--- bsp/qemu-virt64-aarch64/drivers/board.h | 8 - bsp/qemu-virt64-aarch64/drivers/drv_gpio.c | 2 - bsp/qemu-virt64-aarch64/drivers/drv_rtc.c | 2 - bsp/qemu-virt64-aarch64/drivers/drv_virtio.c | 2 - components/mm/ioremap.c | 23 +- libcpu/aarch64/common/interrupt.c | 2 +- libcpu/aarch64/common/mmu.c | 47 +- libcpu/aarch64/common/mmu.h | 11 +- libcpu/aarch64/common/setup.c | 239 ++++++++- libcpu/aarch64/cortex-a/entry_point.S | 522 +++++++++---------- 11 files changed, 540 insertions(+), 414 deletions(-) diff --git a/bsp/qemu-virt64-aarch64/drivers/board.c b/bsp/qemu-virt64-aarch64/drivers/board.c index a86f171e6c..ade7de67b8 100644 --- a/bsp/qemu-virt64-aarch64/drivers/board.c +++ b/bsp/qemu-virt64-aarch64/drivers/board.c @@ -11,104 +11,22 @@ * add smp ipi init */ -#include -#include #include -#include - -#ifdef RT_USING_SMART -#include -#endif - -#include "board.h" +#include #include #include -#include #include -#ifdef RT_USING_SMART -struct mem_desc platform_mem_desc[] = { - {KERNEL_VADDR_START, KERNEL_VADDR_START + 0x0fffffff, (rt_size_t)ARCH_MAP_FAILED, NORMAL_MEM} -}; -#else - -#define PAGE_POOL_SIZE (2ul << 20) -#define PHYMEM_END (0x48000000ul) - -struct mem_desc platform_mem_desc[] = -{ - {0x40000000, PHYMEM_END - 1, 0x40000000, NORMAL_MEM}, - // {PL031_RTC_BASE, PL031_RTC_BASE + 0x1000 - 1, PL031_RTC_BASE, DEVICE_MEM}, - // {PL061_GPIO_BASE, PL061_GPIO_BASE + 0x1000 - 1, PL061_GPIO_BASE, DEVICE_MEM}, - {PL011_UART0_BASE, PL011_UART0_BASE + ARCH_SECTION_SIZE - 1, PL011_UART0_BASE, DEVICE_MEM}, - {VIRTIO_MMIO_BASE, RT_ALIGN(VIRTIO_MMIO_BASE + VIRTIO_MAX_NR * VIRTIO_MMIO_SIZE, ARCH_SECTION_SIZE) - 1, VIRTIO_MMIO_BASE, DEVICE_MEM}, -#ifdef BSP_USING_GICV2 - {GIC_PL390_DISTRIBUTOR_PPTR, GIC_PL390_DISTRIBUTOR_PPTR + ARCH_SECTION_SIZE - 1, GIC_PL390_DISTRIBUTOR_PPTR, DEVICE_MEM}, -#endif -#ifdef BSP_USING_GICV3 - {GIC_PL500_DISTRIBUTOR_PPTR, GIC_PL500_DISTRIBUTOR_PPTR + 0x1000 - 1, GIC_PL500_DISTRIBUTOR_PPTR, DEVICE_MEM}, - {GIC_PL500_REDISTRIBUTOR_PPTR, GIC_PL500_REDISTRIBUTOR_PPTR + 0xf60000 - 1, GIC_PL500_REDISTRIBUTOR_PPTR, DEVICE_MEM}, -#endif -}; -#endif - -const rt_uint32_t platform_mem_desc_size = sizeof(platform_mem_desc)/sizeof(platform_mem_desc[0]); - -void idle_wfi(void) -{ - asm volatile ("wfi"); -} - -/** - * This function will initialize board - */ - extern size_t MMUTable[]; -#ifdef RT_USING_SMART -rt_region_t init_page_region = { - PAGE_START, - PAGE_END, -}; -#else -rt_region_t init_page_region = { - PHYMEM_END - PAGE_POOL_SIZE, - PHYMEM_END, -}; -#endif - void rt_hw_board_init(void) { -#ifdef RT_USING_SMART - rt_hw_mmu_map_init(&rt_kernel_space, (void*)0xfffffffff0000000, 0x10000000, MMUTable, PV_OFFSET); -#else - rt_hw_mmu_map_init(&rt_kernel_space, (void*)0x80000000, 0x10000000, MMUTable, 0); -#endif - rt_page_init(init_page_region); - rt_hw_mmu_setup(&rt_kernel_space, platform_mem_desc, platform_mem_desc_size); + rt_fdt_commit_memregion_early(&(rt_region_t) + { + .name = "memheap", + .start = (rt_size_t)rt_kmem_v2p(HEAP_BEGIN), + .end = (rt_size_t)rt_kmem_v2p(HEAP_END), + }, RT_TRUE); - /* initialize system heap */ - rt_system_heap_init((void *)HEAP_BEGIN, (void *)HEAP_END); - - /* initialize hardware interrupt */ - rt_hw_interrupt_init(); - - rt_hw_gtimer_init(); - - /* support debug feature before components init */ - rt_hw_uart_init(); - rt_console_set_device(RT_CONSOLE_DEVICE_NAME); - -#ifdef RT_USING_OFW rt_hw_common_setup(); -#endif - - rt_components_board_init(); - - rt_thread_idle_sethook(idle_wfi); - -#ifdef RT_USING_SMP - /* install IPI handle */ - rt_hw_ipi_handler_install(RT_SCHEDULE_IPI, rt_scheduler_ipi_handler); -#endif } diff --git a/bsp/qemu-virt64-aarch64/drivers/board.h b/bsp/qemu-virt64-aarch64/drivers/board.h index cb0aaaea60..e9fc17a283 100644 --- a/bsp/qemu-virt64-aarch64/drivers/board.h +++ b/bsp/qemu-virt64-aarch64/drivers/board.h @@ -18,15 +18,7 @@ extern unsigned char __bss_start; extern unsigned char __bss_end; #define HEAP_BEGIN (void *)&__bss_end - -#ifdef RT_USING_SMART -#define HEAP_END (rt_size_t)(KERNEL_VADDR_START + 64 * 1024 * 1024) -#define PAGE_START HEAP_END + 1 * 1024 * 1024 -#define PAGE_END ((rt_size_t)KERNEL_VADDR_START + 128 * 1024 * 1024) -#else #define HEAP_END ((void *)HEAP_BEGIN + 64 * 1024 * 1024) -#define KERNEL_VADDR_START 0x40000000 -#endif void rt_hw_board_init(void); diff --git a/bsp/qemu-virt64-aarch64/drivers/drv_gpio.c b/bsp/qemu-virt64-aarch64/drivers/drv_gpio.c index 09712f098b..73088927a9 100644 --- a/bsp/qemu-virt64-aarch64/drivers/drv_gpio.c +++ b/bsp/qemu-virt64-aarch64/drivers/drv_gpio.c @@ -305,9 +305,7 @@ int rt_hw_gpio_init(void) rt_spin_lock_init(&_pl061.spinlock); #endif -#ifdef RT_USING_LWP pl061_gpio_base = (rt_size_t)rt_ioremap((void *)pl061_gpio_base, PL061_GPIO_SIZE); -#endif rt_device_pin_register("gpio", &ops, RT_NULL); rt_hw_interrupt_install(PL061_GPIO_IRQNUM, rt_hw_gpio_isr, RT_NULL, "gpio"); diff --git a/bsp/qemu-virt64-aarch64/drivers/drv_rtc.c b/bsp/qemu-virt64-aarch64/drivers/drv_rtc.c index a899c2a6c9..238e50074b 100644 --- a/bsp/qemu-virt64-aarch64/drivers/drv_rtc.c +++ b/bsp/qemu-virt64-aarch64/drivers/drv_rtc.c @@ -102,9 +102,7 @@ const static struct rt_device_ops pl031_rtc_ops = int rt_hw_rtc_init(void) { -#ifdef RT_USING_LWP pl031_rtc_base = (rt_size_t)rt_ioremap((void *)pl031_rtc_base, PL031_RTC_SIZE); -#endif rt_memset(&rtc_device, 0, sizeof(rtc_device)); diff --git a/bsp/qemu-virt64-aarch64/drivers/drv_virtio.c b/bsp/qemu-virt64-aarch64/drivers/drv_virtio.c index 6fe84973e5..2634da6773 100644 --- a/bsp/qemu-virt64-aarch64/drivers/drv_virtio.c +++ b/bsp/qemu-virt64-aarch64/drivers/drv_virtio.c @@ -62,14 +62,12 @@ int rt_virtio_devices_init(void) return 0; } -#ifdef RT_USING_LWP mmio_base = (rt_ubase_t)rt_ioremap((void *)mmio_base, VIRTIO_MMIO_SIZE * VIRTIO_MAX_NR); if (mmio_base == RT_NULL) { return -RT_ERROR; } -#endif for (i = 0; i < VIRTIO_MAX_NR; ++i, ++irq, mmio_base += VIRTIO_MMIO_SIZE) { diff --git a/components/mm/ioremap.c b/components/mm/ioremap.c index 5d31185e57..49e7a5fee9 100644 --- a/components/mm/ioremap.c +++ b/components/mm/ioremap.c @@ -18,8 +18,9 @@ void *rt_ioremap_start; size_t rt_ioremap_size; #ifdef RT_USING_SMART - #include +#endif + #define DBG_TAG "mm.ioremap" #define DBG_LVL DBG_LOG #include @@ -111,23 +112,3 @@ void rt_iounmap(volatile void *vaddr) rt_aspace_unmap(&rt_kernel_space, (void *)vaddr); } -#else -void *rt_ioremap(void *paddr, size_t size) -{ - return paddr; -} - -void *rt_ioremap_nocache(void *paddr, size_t size) -{ - return paddr; -} - -void *rt_ioremap_cached(void *paddr, size_t size) -{ - return paddr; -} - -void rt_iounmap(volatile void *vaddr) -{ -} -#endif diff --git a/libcpu/aarch64/common/interrupt.c b/libcpu/aarch64/common/interrupt.c index 8a5ed7bdd9..570859aee6 100644 --- a/libcpu/aarch64/common/interrupt.c +++ b/libcpu/aarch64/common/interrupt.c @@ -104,7 +104,7 @@ void rt_hw_interrupt_init(void) rt_memset(isr_table, 0x00, sizeof(isr_table)); /* initialize ARM GIC */ -#ifdef RT_USING_SMART +#if defined(RT_USING_SMART) || defined(RT_USING_OFW) gic_dist_base = (rt_uint64_t)rt_ioremap((void*)platform_get_gic_dist_base(), 0x40000); gic_cpu_base = (rt_uint64_t)rt_ioremap((void*)platform_get_gic_cpu_base(), 0x1000); #ifdef BSP_USING_GICV3 diff --git a/libcpu/aarch64/common/mmu.c b/libcpu/aarch64/common/mmu.c index 82bdce6820..95cff08476 100644 --- a/libcpu/aarch64/common/mmu.c +++ b/libcpu/aarch64/common/mmu.c @@ -15,13 +15,15 @@ #include #include +#define __MMU_INTERNAL + #include "mm_aspace.h" #include "mm_page.h" #include "mmu.h" #include "tlb.h" -#ifdef RT_USING_SMART #include "ioremap.h" +#ifdef RT_USING_SMART #include #endif @@ -45,6 +47,10 @@ #define MMU_TBL_PAGE_4k_LEVEL 3 #define MMU_TBL_LEVEL_NR 4 +#ifndef KERNEL_VADDR_START +#define KERNEL_VADDR_START ARCH_TEXT_OFFSET +#endif + volatile unsigned long MMUTable[512] __attribute__((aligned(4 * 1024))); struct mmu_level_info @@ -423,21 +429,13 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr) rt_page_cleanup(); } -#ifdef RT_USING_SMART static void _init_region(void *vaddr, size_t size) { rt_ioremap_start = vaddr; rt_ioremap_size = size; rt_mpr_start = (char *)rt_ioremap_start - rt_mpr_size; } -#else -#define RTOS_VEND (0xfffffffff000UL) -static inline void _init_region(void *vaddr, size_t size) -{ - rt_mpr_start = (void *)(RTOS_VEND - rt_mpr_size); -} -#endif /** * This function will initialize rt_mmu_info structure. @@ -476,12 +474,8 @@ int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, size_t size, return -1; } -#ifdef RT_USING_SMART rt_aspace_init(aspace, (void *)KERNEL_VADDR_START, 0 - KERNEL_VADDR_START, vtable); -#else - rt_aspace_init(aspace, (void *)0x1000, RTOS_VEND - 0x1000ul, vtable); -#endif _init_region(v_address, size); @@ -497,10 +491,14 @@ int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, size_t size, void mmu_tcr_init(void) { unsigned long val64; + unsigned long pa_range; val64 = 0x00447fUL; __asm__ volatile("msr MAIR_EL1, %0\n dsb sy\n" ::"r"(val64)); + __asm__ volatile ("mrs %0, ID_AA64MMFR0_EL1":"=r"(val64)); + pa_range = val64 & 0xf; /* PARange */ + /* TCR_EL1 */ val64 = (16UL << 0) /* t0sz 48bit */ | (0x0UL << 6) /* reserved */ @@ -516,7 +514,7 @@ void mmu_tcr_init(void) | (0x3UL << 26) /* t1 outer wb cacheable */ | (0x2UL << 28) /* t1 outer shareable */ | (0x2UL << 30) /* t1 4k */ - | (0x1UL << 32) /* 001b 64GB PA */ + | (pa_range << 32) /* PA range */ | (0x0UL << 35) /* reserved */ | (0x1UL << 36) /* as: 0:8bit 1:16bit */ | (0x0UL << 37) /* tbi0 */ @@ -530,17 +528,21 @@ struct page_table }; /* */ -static struct page_table __init_page_array[6] rt_align(0x1000); -static unsigned long __page_off = 2UL; /* 0, 1 for ttbr0, ttrb1 */ +static struct page_table* __init_page_array; +static unsigned long __page_off = 0UL; unsigned long get_ttbrn_base(void) { return (unsigned long) __init_page_array; } +void set_free_page(void *page_array) +{ + __init_page_array = page_array; +} + unsigned long get_free_page(void) { - __page_off++; - return (unsigned long) (__init_page_array[__page_off - 1].page); + return (unsigned long) (__init_page_array[__page_off++].page); } static int _map_single_page_2M(unsigned long *lv0_tbl, unsigned long va, @@ -594,6 +596,7 @@ static int _map_single_page_2M(unsigned long *lv0_tbl, unsigned long va, void *rt_ioremap_early(void *paddr, size_t size) { size_t count; + rt_ubase_t base; static void *tbl = RT_NULL; if (!size) @@ -607,10 +610,16 @@ void *rt_ioremap_early(void *paddr, size_t size) } count = (size + ARCH_SECTION_MASK) >> ARCH_SECTION_SHIFT; + base = (rt_ubase_t)paddr & (~ARCH_SECTION_MASK); while (count --> 0) { - _map_single_page_2M(tbl, (unsigned long)paddr, (unsigned long)paddr, MMU_MAP_K_DEVICE); + if (_map_single_page_2M(tbl, base, base, MMU_MAP_K_DEVICE)) + { + return RT_NULL; + } + + base += ARCH_SECTION_SIZE; } return paddr; diff --git a/libcpu/aarch64/common/mmu.h b/libcpu/aarch64/common/mmu.h index b58e58e929..5b6d3a78e6 100644 --- a/libcpu/aarch64/common/mmu.h +++ b/libcpu/aarch64/common/mmu.h @@ -11,6 +11,8 @@ #ifndef __MMU_H_ #define __MMU_H_ +#ifndef __ASSEMBLY__ + #include #include @@ -30,6 +32,8 @@ struct mem_desc struct rt_varea varea; }; +#endif /* !__ASSEMBLY__ */ + #define RT_HW_MMU_PROT_READ 1 #define RT_HW_MMU_PROT_WRITE 2 #define RT_HW_MMU_PROT_EXECUTE 4 @@ -85,6 +89,8 @@ struct mem_desc #define ARCH_MAP_FAILED ((void *)0x1ffffffffffff) +#ifndef __ASSEMBLY__ + struct rt_aspace; void rt_hw_mmu_ktbl_set(unsigned long tbl); @@ -93,8 +99,7 @@ void rt_hw_mem_setup_early(unsigned long *tbl0, unsigned long *tbl1, void rt_hw_mmu_setup(struct rt_aspace *aspace, struct mem_desc *mdesc, int desc_nr); -int rt_hw_mmu_map_init(struct rt_aspace *aspace, void *v_address, - rt_size_t size, rt_size_t *vtable, rt_size_t pv_off); +int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, size_t size, size_t *vtable, size_t pv_off); void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr, size_t size, size_t attr); void rt_hw_mmu_unmap(struct rt_aspace *aspace, void *v_addr, size_t size); @@ -204,4 +209,6 @@ rt_inline rt_bool_t rt_hw_mmu_attr_test_perm(size_t attr, rt_base_t prot) int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size, enum rt_mmu_cntl cmd); +#endif /* !__ASSEMBLY__ */ + #endif diff --git a/libcpu/aarch64/common/setup.c b/libcpu/aarch64/common/setup.c index 9f9709db47..6a91f131f6 100644 --- a/libcpu/aarch64/common/setup.c +++ b/libcpu/aarch64/common/setup.c @@ -18,7 +18,8 @@ #include #include #include - +#include +#include #include #include #include @@ -31,13 +32,18 @@ #define rt_sysreg_read(sysreg, val) \ __asm__ volatile ("mrs %0, "RT_STRINGIFY(sysreg)"":"=r"((val))) +#define SIZE_KB 1024 +#define SIZE_MB (1024 * SIZE_KB) +#define SIZE_GB (1024 * SIZE_MB) +extern rt_ubase_t _start, _end; extern void _secondary_cpu_entry(void); extern size_t MMUTable[]; extern void *system_vectors; static void *fdt_ptr = RT_NULL; static rt_size_t fdt_size = 0; +static rt_uint64_t initrd_ranges[3] = { }; #ifdef RT_USING_SMP extern struct cpu_ops_t cpu_psci_ops; @@ -63,15 +69,18 @@ static struct rt_ofw_node *cpu_np[RT_CPUS_NR] = { }; void rt_hw_fdt_install_early(void *fdt) { - void *fdt_vaddr = fdt - PV_OFFSET; - - if (fdt != RT_NULL && !fdt_check_header(fdt_vaddr)) + if (fdt != RT_NULL && !fdt_check_header(fdt)) { - fdt_ptr = fdt_vaddr; - fdt_size = fdt_totalsize(fdt_vaddr); + fdt_ptr = fdt; + fdt_size = fdt_totalsize(fdt); } } +rt_weak void rt_hw_idle_wfi(void) +{ + __asm__ volatile ("wfi"); +} + static void system_vectors_init(void) { rt_hw_set_current_vbar((rt_ubase_t)&system_vectors); @@ -124,17 +133,235 @@ rt_inline void cpu_info_init(void) rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, rt_cpu_mpidr_table, sizeof(rt_cpu_mpidr_table)); } +rt_inline rt_bool_t is_kernel_aspace(const char *name) +{ + static char * const names[] = + { + "kernel", + "memheap", + }; + + if (!name) + { + return RT_FALSE; + } + + for (int i = 0; i < RT_ARRAY_SIZE(names); ++i) + { + if (!rt_strcmp(names[i], name)) + { + return RT_TRUE; + } + } + + return RT_FALSE; +} + void rt_hw_common_setup(void) { + rt_size_t mem_region_nr; + rt_region_t *mem_region; + rt_size_t page_best_start; + rt_region_t platform_mem_region; + static struct mem_desc platform_mem_desc; + void *kernel_start, *kernel_end, *memheap_start = RT_NULL, *memheap_end = RT_NULL; + +#ifdef RT_USING_SMART + rt_hw_mmu_map_init(&rt_kernel_space, (void*)0xfffffffff0000000, 0x10000000, MMUTable, PV_OFFSET); +#else + rt_hw_mmu_map_init(&rt_kernel_space, (void*)0xffffd0000000, 0x10000000, MMUTable, 0); +#endif + + kernel_start = rt_kmem_v2p((void *)&_start) - 64; + kernel_end = rt_kmem_v2p((void *)&_end); + + if (!rt_fdt_commit_memregion_request(&mem_region, &mem_region_nr, RT_TRUE)) + { + const char *name = "memheap"; + + while (mem_region_nr --> 0) + { + if (mem_region->name == name || !rt_strcmp(mem_region->name, name)) + { + memheap_start = (void *)mem_region->start; + memheap_end = (void *)mem_region->end; + + break; + } + } + } + + page_best_start = (rt_size_t)(memheap_end ? : kernel_end); + + if (memheap_end && fdt_ptr > kernel_start) + { + rt_memmove(memheap_end - PV_OFFSET, fdt_ptr - PV_OFFSET, fdt_size); + + fdt_ptr = memheap_end; + + page_best_start = (rt_size_t)fdt_ptr + fdt_size; + } + + rt_fdt_commit_memregion_early(&(rt_region_t) + { + .name = "fdt", + .start = (rt_size_t)fdt_ptr, + .end = (rt_size_t)(fdt_ptr + fdt_size), + }, RT_TRUE); + + fdt_ptr -= PV_OFFSET; + + rt_fdt_commit_memregion_early(&(rt_region_t) + { + .name = "kernel", + .start = (rt_size_t)kernel_start, + .end = (rt_size_t)kernel_end, + }, RT_TRUE); + if (rt_fdt_prefetch(fdt_ptr)) { /* Platform cannot be initialized */ RT_ASSERT(0); } + rt_fdt_scan_chosen_stdout(); + + rt_fdt_scan_initrd(initrd_ranges); + + rt_fdt_scan_memory(); + + if (memheap_start && memheap_end) + { + rt_system_heap_init(memheap_start - PV_OFFSET, memheap_end - PV_OFFSET); + } + + platform_mem_region.start = ~0UL; + platform_mem_region.end = 0; + + if (!rt_fdt_commit_memregion_request(&mem_region, &mem_region_nr, RT_TRUE)) + { + LOG_I("Reserved memory:"); + + while (mem_region_nr --> 0) + { + if (is_kernel_aspace(mem_region->name)) + { + if (platform_mem_region.start > mem_region->start) + { + platform_mem_region.start = mem_region->start; + } + + if (platform_mem_region.end < mem_region->end) + { + platform_mem_region.end = mem_region->end; + } + } + + LOG_I(" %-*.s [%p, %p]", RT_NAME_MAX, mem_region->name, mem_region->start, mem_region->end); + + ++mem_region; + } + } + + if (!rt_fdt_commit_memregion_request(&mem_region, &mem_region_nr, RT_FALSE)) + { + rt_ubase_t best_offset = ~0UL; + rt_region_t *usable_mem_region = mem_region, *page_region = RT_NULL, init_page_region = { 0 }; + + LOG_I("Usable memory:"); + + for (int i = 0; i < mem_region_nr; ++i, ++mem_region) + { + if (!mem_region->name) + { + continue; + } + + if (platform_mem_region.start > mem_region->start) + { + platform_mem_region.start = mem_region->start; + } + + if (platform_mem_region.end < mem_region->end) + { + platform_mem_region.end = mem_region->end; + } + + if (mem_region->start >= page_best_start && + mem_region->start - page_best_start < best_offset && + /* MUST >= 1MB */ + mem_region->end - mem_region->start >= SIZE_MB) + { + page_region = mem_region; + + best_offset = page_region->start - page_best_start; + } + + LOG_I(" %-*.s [%p, %p]", RT_NAME_MAX, mem_region->name, mem_region->start, mem_region->end); + + } + + RT_ASSERT(page_region != RT_NULL); + + init_page_region.start = page_region->start - PV_OFFSET; + init_page_region.end = page_region->end - PV_OFFSET; + + rt_page_init(init_page_region); + + platform_mem_region.start = RT_ALIGN(platform_mem_region.start, ARCH_PAGE_SIZE); + platform_mem_region.end = RT_ALIGN_DOWN(platform_mem_region.end, ARCH_PAGE_SIZE); + RT_ASSERT(platform_mem_region.end - platform_mem_region.start != 0); + + platform_mem_desc.paddr_start = platform_mem_region.start; + platform_mem_desc.vaddr_start = platform_mem_region.start - PV_OFFSET; + platform_mem_desc.vaddr_end = platform_mem_region.end - PV_OFFSET - 1; + platform_mem_desc.attr = NORMAL_MEM; + + rt_hw_mmu_setup(&rt_kernel_space, &platform_mem_desc, 1); + + rt_fdt_earlycon_kick(FDT_EARLYCON_KICK_UPDATE); + + mem_region = usable_mem_region; + + for (int i = 0; i < mem_region_nr; ++i, ++mem_region) + { + if (mem_region != page_region) + { + rt_page_install(*mem_region); + } + } + } + rt_fdt_unflatten(); cpu_info_init(); + + /* initialize hardware interrupt */ + rt_hw_interrupt_init(); + + /* initialize uart */ + rt_hw_uart_init(); + + /* initialize timer for os tick */ + rt_hw_gtimer_init(); + + #ifdef RT_USING_COMPONENTS_INIT + rt_components_board_init(); +#endif + +#if defined(RT_USING_CONSOLE) && defined(RT_USING_DEVICE) + rt_ofw_console_setup(); +#endif + + rt_thread_idle_sethook(rt_hw_idle_wfi); + +#ifdef RT_USING_SMP + /* Install the IPI handle */ + rt_hw_ipi_handler_install(RT_SCHEDULE_IPI, rt_scheduler_ipi_handler); + rt_hw_ipi_handler_install(RT_STOP_IPI, rt_scheduler_ipi_handler); + rt_hw_interrupt_umask(RT_SCHEDULE_IPI); + rt_hw_interrupt_umask(RT_STOP_IPI); +#endif } #ifdef RT_USING_SMP diff --git a/libcpu/aarch64/cortex-a/entry_point.S b/libcpu/aarch64/cortex-a/entry_point.S index 08e4b03768..d68f31a89b 100644 --- a/libcpu/aarch64/cortex-a/entry_point.S +++ b/libcpu/aarch64/cortex-a/entry_point.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2020, RT-Thread Development Team + * Copyright (c) 2006-2023, RT-Thread Development Team * * SPDX-License-Identifier: Apache-2.0 * @@ -9,7 +9,46 @@ * 2023-04-29 GuEe-GUI support kernel's ARM64 boot header */ -#include "rtconfig.h" +#ifndef __ASSEMBLY__ +#define __ASSEMBLY__ +#endif + +#include +#include + +#define ARM64_IMAGE_FLAG_BE_SHIFT 0 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1) +#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2) + +#define ARM64_IMAGE_FLAG_LE 0 +#define ARM64_IMAGE_FLAG_BE 1 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3 +#define ARM64_IMAGE_FLAG_PHYS_BASE 1 + +#define _HEAD_FLAG(field) (_HEAD_FLAG_##field << ARM64_IMAGE_FLAG_##field##_SHIFT) + +#ifdef ARCH_CPU_BIG_ENDIAN +#define _HEAD_FLAG_BE ARM64_IMAGE_FLAG_BE +#else +#define _HEAD_FLAG_BE ARM64_IMAGE_FLAG_LE +#endif +#define _HEAD_FLAG_PAGE_SIZE ((ARCH_PAGE_SHIFT - 10) / 2) +#define _HEAD_FLAG_PHYS_BASE 1 + +#define _HEAD_FLAGS (_HEAD_FLAG(BE) | _HEAD_FLAG(PAGE_SIZE) | _HEAD_FLAG(PHYS_BASE)) + +.macro get_phy, reg, symbol + adrp \reg, \symbol + add \reg, \reg, #:lo12:\symbol +.endm + +.macro get_pvoff, tmp, out + ldr \tmp, =.boot_cpu_stack_top + get_phy \out, .boot_cpu_stack_top + sub \out, \out, \tmp +.endm .section ".text.entrypoint","ax" @@ -24,7 +63,7 @@ _head: .long 0 /* Executable code */ .quad _text_offset /* Image load offset from start of RAM, little endian */ .quad _end - _head /* Effective Image size, little endian (_end defined in link.lds) */ - .quad 0xa /* Kernel flags, little endian */ + .quad _HEAD_FLAGS /* Kernel flags, little endian */ .quad 0 /* Reserved */ .quad 0 /* Reserved */ .quad 0 /* Reserved */ @@ -38,7 +77,7 @@ boot_arg1 .req x23 boot_arg2 .req x24 stack_top .req x25 -.global _start + .global _start _start: /* * Boot CPU general-purpose register settings: @@ -51,296 +90,255 @@ _start: mov boot_arg0, x1 mov boot_arg1, x2 mov boot_arg2, x3 -#ifdef ARCH_ARM_BOOTWITH_FLUSH_CACHE - bl __asm_flush_dcache_all -#endif - bl rt_hw_cpu_id_set - /* read cpu id, stop slave cores */ - mrs x0, tpidr_el1 - cbz x0, .L__cpu_0 /* .L prefix is the local label in ELF */ -#ifndef RT_AMP_SLAVE - /* cpu id > 0, stop */ - /* cpu id == 0 will also goto here after returned from entry() if possible */ -.L__current_cpu_idle: - wfe - b .L__current_cpu_idle -#endif + /* Save cpu stack */ + get_phy stack_top, .boot_cpu_stack_top + /* Save cpu id temp */ + msr tpidr_el1, xzr -.L__cpu_0: - /* set stack before our code, Define stack pointer for current exception level */ - adr x1, .el_stack_top - - /* set up EL1 */ - mrs x0, CurrentEL /* CurrentEL Register. bit 2, 3. Others reserved */ - and x0, x0, #12 /* clear reserved bits */ - - /* running at EL3? */ - cmp x0, #12 /* 1100b. So, EL3 */ - bne .L__not_in_el3 /* 11? !EL3 -> 5: */ - - /* should never be executed, just for completeness. (EL3) */ - mov x2, #0x5b1 - msr scr_el3, x2 /* SCR_ELn Secure Configuration Register */ - mov x2, #0x3c9 - msr spsr_el3, x2 /* SPSR_ELn. Saved Program Status Register. 1111001001 */ - adr x2, .L__not_in_el3 - msr elr_el3, x2 - eret /* Exception Return: from EL3, continue from .L__not_in_el3 */ - -.L__not_in_el3: /* running at EL2 or EL1 */ - cmp x0, #4 /* 0x04 0100 EL1 */ - beq .L__in_el1 /* EL1 -> 5: */ - - mrs x0, hcr_el2 - bic x0, x0, #0xff - msr hcr_el2, x0 - - msr sp_el1, x1 /* in EL2, set sp of EL1 to _start */ - - /* enable CNTP for EL1 */ - mrs x0, cnthctl_el2 /* Counter-timer Hypervisor Control register */ - orr x0, x0, #3 - msr cnthctl_el2, x0 - msr cntvoff_el2, xzr - - /* enable AArch64 in EL1 */ - mov x0, #(1 << 31) /* AArch64 */ - orr x0, x0, #(1 << 1) /* SWIO hardwired on Pi3 */ - msr hcr_el2, x0 - mrs x0, hcr_el2 - - /* change execution level to EL1 */ - mov x2, #0x3c4 - msr spsr_el2, x2 /* 1111000100 */ - adr x2, .L__in_el1 - msr elr_el2, x2 - - eret /* exception return. from EL2. continue from .L__in_el1 */ - -.macro GET_PHY reg, symbol - adrp \reg, \symbol - add \reg, \reg, #:lo12:\symbol -.endm - -.L__in_el1: - mov sp, x1 /* in EL1. Set sp to _start */ - - /* Set CPACR_EL1 (Architecture Feature Access Control Register) to avoid trap from SIMD or float point instruction */ - mov x1, #0x00300000 /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */ - msr cpacr_el1, x1 - /* applying context change */ - dsb ish - isb - - /* clear bss */ - GET_PHY x1, __bss_start - GET_PHY x2, __bss_end - sub x2, x2, x1 /* get bss size */ - - and x3, x2, #7 /* x3 is < 7 */ - ldr x4, =~0x7 - and x2, x2, x4 /* mask ~7 */ - -.L__clean_bss_loop: - cbz x2, .L__clean_bss_loop_1 - str xzr, [x1], #8 - sub x2, x2, #8 - b .L__clean_bss_loop - -.L__clean_bss_loop_1: - cbz x3, .L__jump_to_entry - strb wzr, [x1], #1 - sub x3, x3, #1 - b .L__clean_bss_loop_1 - -.L__jump_to_entry: /* jump to C code, should not return */ - bl mmu_tcr_init - - bl get_ttbrn_base - add x1, x0, #0x1000 - - msr ttbr0_el1, x0 - msr ttbr1_el1, x1 - dsb sy - -#ifdef RT_USING_SMART - ldr x2, =_start - GET_PHY x3, _start - sub x3, x3, x2 -#else - mov x3,0 -#endif - - ldr x2, =0x10000000 /* map 256M memory for kernel space */ - bl rt_hw_mem_setup_early - - ldr x30, =after_mmu_enable /* set LR to after_mmu_enable function, it's a v_addr */ - - mrs x1, sctlr_el1 - bic x1, x1, #(3 << 3) /* dis SA, SA0 */ - bic x1, x1, #(1 << 1) /* dis A */ - orr x1, x1, #(1 << 12) /* I */ - orr x1, x1, #(1 << 2) /* C */ - orr x1, x1, #(1 << 0) /* M */ - msr sctlr_el1, x1 /* enable MMU */ - - dsb ish - isb - ic ialluis /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */ - dsb ish - isb - tlbi vmalle1 /* Invalidate all stage 1 translations used at EL1 with the current VMID */ - dsb ish - isb - ret - -after_mmu_enable: -#ifdef RT_USING_SMART - mrs x0, tcr_el1 /* disable ttbr0, only using kernel space */ - orr x0, x0, #(1 << 7) - msr tcr_el1, x0 - msr ttbr0_el1, xzr - dsb sy -#endif - - mov x0, #1 - msr spsel, x0 - adr x1, .el_stack_top - mov sp, x1 /* sp_el1 set to _start */ + bl init_cpu_el + bl init_kernel_bss + bl init_cpu_stack_early #ifdef RT_USING_OFW /* Save devicetree info */ mov x0, dtb_paddr bl rt_hw_fdt_install_early #endif - b rtthread_startup + + /* Now we are in the end of boot cpu process */ + ldr x8, =rtthread_startup + b init_mmu_early + +kernel_start: + /* jump to the PE's system entry */ + mov x29, xzr + mov x30, x8 + br x8 + +cpu_idle: + wfe + b cpu_idle #ifdef RT_USING_SMP -/** - * secondary cpu - */ - -.global _secondary_cpu_entry + .globl _secondary_cpu_entry _secondary_cpu_entry: +#ifdef RT_USING_OFW + /* Read cpu id */ + mrs x5, mpidr_el1 + ldr x1, =rt_cpu_mpidr_table + get_pvoff x4 x2 + add x1, x1, x2 + mov x2, #0 + ldr x4, =0xff00ffffff + and x0, x5, x4 + +.cpu_id_confirm: + add x2, x2, #1 /* Next cpu id inc */ + ldr x3, [x1], #8 + cmp x3, #0 + beq cpu_idle + and x3, x3, x4 + cmp x3, x0 + bne .cpu_id_confirm + + /* Save this mpidr */ + str x5, [x1, #-8] + + /* Get cpu id success */ + sub x0, x2, #1 + msr tpidr_el1, x0 /* Save cpu id global */ +#else bl rt_hw_cpu_id_set - adr x1, .el_stack_top + mrs x0, tpidr_el1 +#endif - /* set up EL1 */ + /* Set current cpu's stack top */ + sub x0, x0, #1 + mov x1, #ARCH_SECONDARY_CPU_STACK_SIZE + get_phy x2, .secondary_cpu_stack_top + msub stack_top, x0, x1, x2 + + bl init_cpu_el + bl init_cpu_stack_early + + /* secondary cpu start to startup */ + ldr x8, =rt_hw_secondary_cpu_bsp_start + b enable_mmu_early +#endif /* RT_USING_SMP */ + +init_cpu_el: mrs x0, CurrentEL /* CurrentEL Register. bit 2, 3. Others reserved */ - and x0, x0, #12 /* clear reserved bits */ + lsr x0, x0, #2 + and x0, x0, #3 - /* running at EL3? */ - cmp x0, #12 /* 1100b. So, EL3 */ - bne .L__not_in_el3_cpux /* 11? !EL3 -> 5: */ + cmp x0, #3 + bne .init_cpu_hyp - /* should never be executed, just for completeness. (EL3) */ - mov x2, #0x5b1 - msr scr_el3, x2 /* SCR_ELn Secure Configuration Register */ - mov x2, #0x3c9 - msr spsr_el3, x2 /* SPSR_ELn. Saved Program Status Register. 1111001001 */ - adr x2, .L__not_in_el3_cpux - msr elr_el3, x2 - eret /* Exception Return: from EL3, continue from .L__not_in_el3 */ + mov x1, #(1 << 0) /* EL0 and EL1 are in Non-Secure state */ + orr x1, x1, #(1 << 4) /* RES1 */ + orr x1, x1, #(1 << 5) /* RES1 */ + orr x1, x1, #(1 << 10) /* The next lower level is AArch64 */ + msr scr_el3, x1 -.L__not_in_el3_cpux: /* running at EL2 or EL1 */ - cmp x0, #4 /* 0x04 0100 EL1 */ - beq .L__in_el1_cpux /* EL1 -> 5: */ + mov x1, #9 /* Next level is 0b1001->EL2h */ + orr x1, x1, #(1 << 6) /* Mask FIQ */ + orr x1, x1, #(1 << 7) /* Mask IRQ */ + orr x1, x1, #(1 << 8) /* Mask SError */ + orr x1, x1, #(1 << 9) /* Mask Debug Exception */ + msr spsr_el3, x1 - mrs x0, hcr_el2 - bic x0, x0, #0xff - msr hcr_el2, x0 + get_phy x1, .init_cpu_hyp + msr elr_el3, x1 + eret - msr sp_el1, x1 /* in EL2, set sp of EL1 to _start */ +.init_cpu_hyp: + cmp x0, #2 /* EL1 = 0100 */ + bne .init_cpu_sys - /* enable CNTP for EL1 */ + /* Enable CNTP for EL1 */ mrs x0, cnthctl_el2 /* Counter-timer Hypervisor Control register */ - orr x0, x0, #3 + orr x0, x0, #(1 << 0) /* Don't traps NS EL0/1 accesses to the physical counter */ + orr x0, x0, #(1 << 1) /* Don't traps NS EL0/1 accesses to the physical timer */ msr cnthctl_el2, x0 msr cntvoff_el2, xzr - /* enable AArch64 in EL1 */ - mov x0, #(1 << 31) /* AArch64 */ - orr x0, x0, #(1 << 1) /* SWIO hardwired on Pi3 */ + mov x0, #(1 << 31) /* Enable AArch64 in EL1 */ + orr x0, x0, #(1 << 1) /* SWIO hardwired */ msr hcr_el2, x0 - mrs x0, hcr_el2 - /* change execution level to EL1 */ - mov x2, #0x3c4 - msr spsr_el2, x2 /* 1111000100 */ - adr x2, .L__in_el1_cpux - msr elr_el2, x2 + mov x0, #5 /* Next level is 0b0101->EL1h */ + orr x0, x0, #(1 << 6) /* Mask FIQ */ + orr x0, x0, #(1 << 7) /* Mask IRQ */ + orr x0, x0, #(1 << 8) /* Mask SError */ + orr x0, x0, #(1 << 9) /* Mask Debug Exception */ + msr spsr_el2, x0 - eret /* exception return. from EL2. continue from .L__in_el1 */ + get_phy x0, .init_cpu_sys + msr elr_el2, x0 + eret -.L__in_el1_cpux: - mrs x0, tpidr_el1 - /* each cpu init stack is 8k */ - sub x1, x1, x0, lsl #13 - mov sp, x1 /* in EL1. Set sp to _start */ +.init_cpu_sys: + mrs x0, sctlr_el1 + orr x0, x0, #(1 << 12) /* Enable Instruction */ + bic x0, x0, #(3 << 3) /* Disable SP Alignment check */ + bic x0, x0, #(1 << 1) /* Disable Alignment check */ + msr sctlr_el1, x0 - /* Set CPACR_EL1 (Architecture Feature Access Control Register) to avoid trap from SIMD or float point instruction */ - mov x1, #0x00300000 /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */ - msr cpacr_el1, x1 + /* Avoid trap from SIMD or float point instruction */ + mov x0, #0x00300000 /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */ + msr cpacr_el1, x0 -.L__jump_to_entry_cpux: /* jump to C code, should not return */ + /* Applying context change */ + dsb ish + isb - /* init mmu early */ - - bl mmu_tcr_init - - bl get_ttbrn_base - add x1, x0, #0x1000 - - msr ttbr0_el1, x0 - msr ttbr1_el1, x1 - dsb sy - - ldr x30, =after_mmu_enable_cpux /* set LR to after_mmu_enable function, it's a v_addr */ - - mrs x1, sctlr_el1 - bic x1, x1, #(3 << 3) /* dis SA, SA0 */ - bic x1, x1, #(1 << 1) /* dis A */ - orr x1, x1, #(1 << 12) /* I */ - orr x1, x1, #(1 << 2) /* C */ - orr x1, x1, #(1 << 0) /* M */ - msr sctlr_el1, x1 /* enable MMU */ - - dsb sy - isb sy - ic ialluis /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */ - dsb sy - isb sy - tlbi vmalle1 /* Invalidate all stage 1 translations used at EL1 with the current VMID */ - dsb sy - isb sy ret -after_mmu_enable_cpux: -#ifdef RT_USING_SMART - mrs x0, tcr_el1 /* disable ttbr0, only using kernel space */ - orr x0, x0, #(1 << 7) - msr tcr_el1, x0 - msr ttbr0_el1, xzr - dsb sy +init_kernel_bss: + get_phy x1, __bss_start + get_phy x2, __bss_end + sub x2, x2, x1 /* Get bss size */ + + and x3, x2, #7 /* x3 is < 7 */ + ldr x4, =~0x7 + and x2, x2, x4 /* Mask ~7 */ + +.clean_bss_loop_quad: + cbz x2, .clean_bss_loop_byte + str xzr, [x1], #8 + sub x2, x2, #8 + b .clean_bss_loop_quad + +.clean_bss_loop_byte: + cbz x3, .clean_bss_end + strb wzr, [x1], #1 + sub x3, x3, #1 + b .clean_bss_loop_byte + +.clean_bss_end: + ret + +init_cpu_stack_early: + msr spsel, #1 + mov sp, stack_top + + ret + +init_mmu_early: + get_phy x0, .early_page_array + bl set_free_page + + get_phy x0, .early_tbl0_page + get_phy x1, .early_tbl1_page + + get_pvoff x2 x3 + ldr x2, =0x40000000 /* Map 1G memory for kernel space */ + bl rt_hw_mem_setup_early + + b enable_mmu_early + +enable_mmu_early: + get_phy x0, .early_tbl0_page + get_phy x1, .early_tbl1_page + + msr ttbr0_el1, x0 + msr ttbr1_el1, x1 + dsb sy + + bl mmu_tcr_init + + /* + * OK, now, we don't use sp before jump to kernel, set sp to current cpu's + * stack top to visual address + */ + get_pvoff x1 x0 + mov x1, stack_top + sub x1, x1, x0 + mov sp, x1 + + ldr x30, =kernel_start /* Set LR to kernel_start function, it's virtual addresses */ + + mrs x1, sctlr_el1 + orr x1, x1, #(1 << 2) /* Cacheable Normal memory in stage1 */ + orr x1, x1, #(1 << 0) /* MMU Enable */ + msr sctlr_el1, x1 + + dsb ish + isb + + ic ialluis /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */ + dsb ish + isb + + tlbi vmalle1 /* Invalidate all stage 1 translations used at EL1 with the current VMID */ + dsb ish + isb + + ret + +/* + * CPU stack builtin + */ + .section ".bss.noclean.cpus_stack" + .align 12 +.cpus_stack: +#if defined(RT_USING_SMP) && RT_CPUS_NR > 1 + .space (ARCH_SECONDARY_CPU_STACK_SIZE * (RT_CPUS_NR - 1)) +.secondary_cpu_stack_top: #endif + .space ARCH_SECONDARY_CPU_STACK_SIZE +.boot_cpu_stack_top: - mov x0, #1 - msr spsel, x0 - mrs x0, tpidr_el1 - /* each cpu init stack is 8k */ - adr x1, .el_stack_top - sub x1, x1, x0, lsl #13 - mov sp, x1 /* in EL1. Set sp to _start */ - - b rt_hw_secondary_cpu_bsp_start -#endif - -#ifndef RT_CPUS_NR -#define RT_CPUS_NR 1 -#endif - -.align 12 -.el_stack: -.space (8192 * RT_CPUS_NR) -.el_stack_top: +/* + * Early page builtin + */ + .section ".bss.noclean.early_page" + .align 12 +.early_tbl0_page: + .space ARCH_PAGE_SIZE +.early_tbl1_page: + /* Map 4G -> 2M * 512 entries */ + .space 4 * ARCH_PAGE_SIZE +.early_page_array: + .space 24 * ARCH_PAGE_SIZE