From 470454d201ebb89aa72525f8c3fb562e372f4f56 Mon Sep 17 00:00:00 2001 From: Shell Date: Sat, 22 Apr 2023 23:59:11 +0800 Subject: [PATCH] [components/mm] support for scalable memory management (#7277) * [mm/page] multi-list page manager [mm/page] page debugger [libcpu/aarch64] hugepage support * [quality] remove void-arith * [format] remove kasan codes --- components/drivers/virtio/virtio_net.c | 3 +- .../lwp/arch/aarch64/cortex-a/lwp_arch.c | 2 +- components/lwp/arch/risc-v/rv64/lwp_arch.c | 2 +- components/lwp/arch/x86/i386/lwp_arch.c | 2 +- components/lwp/lwp.c | 15 +- components/lwp/lwp_shm.c | 7 +- components/lwp/lwp_syscall.c | 10 +- components/lwp/lwp_user_mm.c | 15 +- components/mm/avl_adpt.c | 4 +- components/mm/ioremap.c | 6 +- components/mm/mm_aspace.c | 81 ++-- components/mm/mm_fault.c | 2 +- components/mm/mm_object.c | 6 +- components/mm/mm_page.c | 416 +++++++++++++----- components/mm/mm_page.h | 31 ++ libcpu/aarch64/common/cache.h | 5 +- libcpu/aarch64/common/cpu.c | 5 +- libcpu/aarch64/common/mmu.c | 173 ++++++-- libcpu/aarch64/common/mmu.h | 22 + src/Kconfig | 26 +- 20 files changed, 603 insertions(+), 230 deletions(-) diff --git a/components/drivers/virtio/virtio_net.c b/components/drivers/virtio/virtio_net.c index 553f532b86..93a30eebab 100644 --- a/components/drivers/virtio/virtio_net.c +++ b/components/drivers/virtio/virtio_net.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef RT_USING_VIRTIO_NET @@ -106,7 +107,7 @@ static struct pbuf *virtio_net_rx(rt_device_t dev) #ifdef RT_USING_SMP level = rt_spin_lock_irqsave(&virtio_dev->spinlock); #endif - rt_memcpy(p->payload, (void *)VIRTIO_PA2VA(queue_rx->desc[id].addr), len); + rt_memcpy(p->payload, (void *)queue_rx->desc[id].addr - PV_OFFSET, len); queue_rx->used_idx++; diff --git a/components/lwp/arch/aarch64/cortex-a/lwp_arch.c b/components/lwp/arch/aarch64/cortex-a/lwp_arch.c index eb6d4dd4f2..da9e5f94bd 100644 --- a/components/lwp/arch/aarch64/cortex-a/lwp_arch.c +++ b/components/lwp/arch/aarch64/cortex-a/lwp_arch.c @@ -26,7 +26,7 @@ int arch_user_space_init(struct rt_lwp *lwp) { size_t *mmu_table; - mmu_table = (size_t *)rt_pages_alloc(0); + mmu_table = (size_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (!mmu_table) { return -RT_ENOMEM; diff --git a/components/lwp/arch/risc-v/rv64/lwp_arch.c b/components/lwp/arch/risc-v/rv64/lwp_arch.c index 7d58294c61..b2315a614f 100644 --- a/components/lwp/arch/risc-v/rv64/lwp_arch.c +++ b/components/lwp/arch/risc-v/rv64/lwp_arch.c @@ -91,7 +91,7 @@ int arch_user_space_init(struct rt_lwp *lwp) { rt_ubase_t *mmu_table; - mmu_table = (rt_ubase_t *)rt_pages_alloc(0); + mmu_table = (rt_ubase_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (!mmu_table) { return -RT_ENOMEM; diff --git a/components/lwp/arch/x86/i386/lwp_arch.c b/components/lwp/arch/x86/i386/lwp_arch.c index 9f9e846582..4b054f969b 100644 --- a/components/lwp/arch/x86/i386/lwp_arch.c +++ b/components/lwp/arch/x86/i386/lwp_arch.c @@ -82,7 +82,7 @@ int arch_user_space_init(struct rt_lwp *lwp) { rt_size_t *mmu_table; - mmu_table = (rt_size_t *)rt_pages_alloc(0); + mmu_table = (rt_size_t *)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (!mmu_table) { return -1; diff --git a/components/lwp/lwp.c b/components/lwp/lwp.c index 375ac9b317..a341c1b977 100644 --- a/components/lwp/lwp.c +++ b/components/lwp/lwp.c @@ -9,6 +9,7 @@ * 2018-11-02 heyuanjie fix complie error in iar * 2021-02-03 lizhirui add 64-bit arch support and riscv64 arch support * 2021-08-26 linzhenxing add lwp_setcwd\lwp_getcwd + * 2023-02-20 wangxiaoyao inv icache before new app startup */ #include @@ -1097,10 +1098,22 @@ static void _lwp_thread_entry(void *parameter) icache_invalid_all(); } + /** + * without ASID support, it will be a special case when trying to run application + * and exit multiple times and a same page frame allocated to it bound to + * different text segment. Then we are in a situation where icache contains + * out-of-dated data and must be handle by the running core itself. + * with ASID support, this should be a rare case that ASID & page frame both + * identical to previous running application. + * + * For a new application loaded into memory, icache are seen as empty. And there + * should be nothing in the icache entry to match. So this icache invalidation + * operation should have barely influence. + */ rt_hw_icache_invalidate_all(); #ifdef ARCH_MM_MMU - arch_start_umode(lwp->args, lwp->text_entry, (void *)USER_STACK_VEND, tid->stack_addr + tid->stack_size); + arch_start_umode(lwp->args, lwp->text_entry, (void *)USER_STACK_VEND, (char *)tid->stack_addr + tid->stack_size); #else arch_start_umode(lwp->args, lwp->text_entry, lwp->data_entry, (void *)((uint32_t)lwp->data_entry + lwp->data_size)); #endif /* ARCH_MM_MMU */ diff --git a/components/lwp/lwp_shm.c b/components/lwp/lwp_shm.c index f2bacf1b13..7ee7e29d8c 100644 --- a/components/lwp/lwp_shm.c +++ b/components/lwp/lwp_shm.c @@ -6,6 +6,7 @@ * Change Logs: * Date Author Notes * 2019-10-12 Jesven first version + * 2023-02-20 wangxiaoyao adapt to mm */ #include #include @@ -17,8 +18,6 @@ #include #include -#include -#include /* the kernel structure to represent a share-memory */ struct lwp_shm_struct @@ -64,7 +63,7 @@ static void on_shm_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg /* map all share page frames to user space in a time */ void *page = (void *)shm->addr; - void *pg_paddr = page + PV_OFFSET; + void *pg_paddr = (char *)page + PV_OFFSET; err = rt_varea_map_range(varea, varea->start, pg_paddr, shm->size); if (err == RT_EOK) @@ -140,7 +139,7 @@ static int _lwp_shmget(size_t key, size_t size, int create) /* allocate pages up to 2's exponent to cover the required size */ bit = rt_page_bits(size); - page_addr = rt_pages_alloc(bit); /* virtual address */ + page_addr = rt_pages_alloc_ext(bit, PAGE_ANY_AVAILABLE); /* virtual address */ if (!page_addr) { goto err; diff --git a/components/lwp/lwp_syscall.c b/components/lwp/lwp_syscall.c index 8d07a9388d..6f8479e26a 100644 --- a/components/lwp/lwp_syscall.c +++ b/components/lwp/lwp_syscall.c @@ -306,7 +306,7 @@ static void _crt_thread_entry(void *parameter) user_stack &= ~7; //align 8 #ifdef ARCH_MM_MMU - arch_crt_start_umode(parameter, tid->user_entry, (void *)user_stack, tid->stack_addr + tid->stack_size); + arch_crt_start_umode(parameter, tid->user_entry, (void *)user_stack, (char *)tid->stack_addr + tid->stack_size); #else set_user_context((void*)user_stack); arch_start_umode(parameter, tid->user_entry, ((struct rt_lwp *)tid->lwp)->data_entry, (void*)user_stack); @@ -1861,7 +1861,7 @@ static char *_insert_args(int new_argc, char *new_argv[], struct lwp_args_info * { goto quit; } - page = rt_pages_alloc(0); /* 1 page */ + page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */ if (!page) { goto quit; @@ -2065,7 +2065,7 @@ int load_ldso(struct rt_lwp *lwp, char *exec_name, char *const argv[], char *con } } - page = rt_pages_alloc(0); /* 1 page */ + page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */ if (!page) { SET_ERRNO(ENOMEM); @@ -2252,7 +2252,7 @@ sysret_t sys_execve(const char *path, char *const argv[], char *const envp[]) SET_ERRNO(EINVAL); goto quit; } - page = rt_pages_alloc(0); /* 1 page */ + page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); /* 1 page */ if (!page) { SET_ERRNO(ENOMEM); @@ -2396,7 +2396,7 @@ sysret_t sys_execve(const char *path, char *const argv[], char *const envp[]) arch_start_umode(lwp->args, lwp->text_entry, (void*)USER_STACK_VEND, - thread->stack_addr + thread->stack_size); + (char *)thread->stack_addr + thread->stack_size); /* never reach here */ } return -EINVAL; diff --git a/components/lwp/lwp_user_mm.c b/components/lwp/lwp_user_mm.c index 068073ec0b..dfb87aca9e 100644 --- a/components/lwp/lwp_user_mm.c +++ b/components/lwp/lwp_user_mm.c @@ -10,6 +10,7 @@ * 2021-02-12 lizhirui add 64-bit support for lwp_brk * 2021-02-19 lizhirui add riscv64 support for lwp_user_accessable and lwp_get_from_user * 2021-06-07 lizhirui modify user space bound check + * 2022-12-25 wangxiaoyao adapt to new mm */ #include @@ -122,7 +123,7 @@ static void _user_do_page_fault(struct rt_varea *varea, if (lwp_objs->source) { - void *paddr = rt_hw_mmu_v2p(lwp_objs->source, msg->fault_vaddr); + char *paddr = rt_hw_mmu_v2p(lwp_objs->source, msg->fault_vaddr); if (paddr != ARCH_MAP_FAILED) { void *vaddr; @@ -130,7 +131,7 @@ static void _user_do_page_fault(struct rt_varea *varea, if (!(varea->flag & MMF_TEXT)) { - void *cp = rt_pages_alloc(0); + void *cp = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (cp) { memcpy(cp, vaddr, ARCH_PAGE_SIZE); @@ -220,9 +221,9 @@ int lwp_unmap_user(struct rt_lwp *lwp, void *va) static void _dup_varea(rt_varea_t varea, struct rt_lwp *src_lwp, rt_aspace_t dst) { - void *vaddr = varea->start; - void *vend = vaddr + varea->size; - if (vaddr < (void *)USER_STACK_VSTART || vaddr >= (void *)USER_STACK_VEND) + char *vaddr = varea->start; + char *vend = vaddr + varea->size; + if (vaddr < (char *)USER_STACK_VSTART || vaddr >= (char *)USER_STACK_VEND) { while (vaddr != vend) { @@ -430,7 +431,7 @@ void *lwp_map_user_phy(struct rt_lwp *lwp, void *map_va, void *map_pa, size_t map_size, int cached) { int err; - void *va; + char *va; size_t offset = 0; if (!map_size) @@ -458,7 +459,7 @@ void *lwp_map_user_phy(struct rt_lwp *lwp, void *map_va, void *map_pa, rt_size_t attr = cached ? MMU_MAP_U_RWCB : MMU_MAP_U_RW; err = - rt_aspace_map_phy(lwp->aspace, &hint, attr, MM_PA_TO_OFF(map_pa), &va); + rt_aspace_map_phy(lwp->aspace, &hint, attr, MM_PA_TO_OFF(map_pa), (void **)&va); if (err != RT_EOK) { va = RT_NULL; diff --git a/components/mm/avl_adpt.c b/components/mm/avl_adpt.c index 22e4ff8330..df19418002 100644 --- a/components/mm/avl_adpt.c +++ b/components/mm/avl_adpt.c @@ -78,7 +78,7 @@ static struct rt_varea *search(struct util_avl_root *root, { rt_varea_t varea = VAREA_ENTRY(node); int cmp = compare(range.start, range.end, varea->start, - varea->start + varea->size - 1); + (char *)varea->start + varea->size - 1); if (cmp < 0) { @@ -118,7 +118,7 @@ rt_varea_t _aspace_bst_search_exceed(struct rt_aspace *aspace, void *start) if (cmp < 0) { /* varae exceed start */ - ptrdiff_t off = va_s - start; + ptrdiff_t off = (char *)va_s - (char *)start; if (off < min_off) { min_off = off; diff --git a/components/mm/ioremap.c b/components/mm/ioremap.c index 93dd695d1c..04f09fdbaf 100644 --- a/components/mm/ioremap.c +++ b/components/mm/ioremap.c @@ -33,12 +33,12 @@ enum ioremap_type static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type) { - void *v_addr = NULL; + char *v_addr = NULL; size_t attr; size_t lo_off; int err; - lo_off = (uintptr_t)paddr & ARCH_PAGE_MASK; + lo_off = (rt_ubase_t)paddr & ARCH_PAGE_MASK; struct rt_mm_va_hint hint = { .prefer = RT_NULL, @@ -62,7 +62,7 @@ static void *_ioremap_type(void *paddr, size_t size, enum ioremap_type type) default: return v_addr; } - err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, MM_PA_TO_OFF(paddr), &v_addr); + err = rt_aspace_map_phy(&rt_kernel_space, &hint, attr, MM_PA_TO_OFF(paddr), (void **)&v_addr); if (err) { diff --git a/components/mm/mm_aspace.c b/components/mm/mm_aspace.c index 1a8429c8f9..d79ceacc16 100644 --- a/components/mm/mm_aspace.c +++ b/components/mm/mm_aspace.c @@ -134,24 +134,16 @@ static int _do_named_map(rt_aspace_t aspace, void *vaddr, rt_size_t length, int err = RT_EOK; /* it's ensured by caller that (void*)end will not overflow */ - void *end = vaddr + length; void *phyaddr = (void *)(offset << MM_PAGE_SHIFT); - while (vaddr != end) + + void *ret = rt_hw_mmu_map(aspace, vaddr, phyaddr, length, attr); + if (ret == RT_NULL) { - /* TODO try to map with huge TLB, when flag & HUGEPAGE */ - rt_size_t pgsz = ARCH_PAGE_SIZE; - void *ret = rt_hw_mmu_map(aspace, vaddr, phyaddr, pgsz, attr); - if (ret == RT_NULL) - { - err = -RT_ERROR; - break; - } - vaddr += pgsz; - phyaddr += pgsz; + err = -RT_ERROR; } if (err == RT_EOK) - rt_hw_tlb_invalidate_range(aspace, end - length, length, ARCH_PAGE_SIZE); + rt_hw_tlb_invalidate_range(aspace, vaddr, length, ARCH_PAGE_SIZE); return err; } @@ -164,7 +156,7 @@ rt_inline void _do_page_fault(struct rt_aspace_fault_msg *msg, rt_size_t off, msg->fault_vaddr = vaddr; msg->fault_op = MM_FAULT_OP_READ; msg->fault_type = MM_FAULT_TYPE_PAGE_FAULT; - msg->response.status = -1; + msg->response.status = MM_FAULT_STATUS_UNRECOVERABLE; msg->response.vaddr = 0; msg->response.size = 0; @@ -180,9 +172,9 @@ int _varea_map_with_msg(rt_varea_t varea, struct rt_aspace_fault_msg *msg) * the page returned by handler is not checked * cause no much assumption can make on it */ - void *store = msg->response.vaddr; + char *store = msg->response.vaddr; rt_size_t store_sz = msg->response.size; - if (msg->fault_vaddr + store_sz > varea->start + varea->size) + if ((char *)msg->fault_vaddr + store_sz > (char *)varea->start + varea->size) { LOG_W("%s: too much (0x%lx) of buffer on vaddr %p is provided", __func__, store_sz, msg->fault_vaddr); @@ -232,9 +224,9 @@ static int _do_prefetch(rt_aspace_t aspace, rt_varea_t varea, void *start, int err = RT_EOK; /* it's ensured by caller that start & size ara page-aligned */ - void *end = start + size; - void *vaddr = start; - rt_size_t off = varea->offset + ((start - varea->start) >> ARCH_PAGE_SHIFT); + char *end = (char *)start + size; + char *vaddr = start; + rt_size_t off = varea->offset + ((vaddr - (char *)varea->start) >> ARCH_PAGE_SHIFT); while (vaddr != end) { @@ -243,8 +235,10 @@ static int _do_prefetch(rt_aspace_t aspace, rt_varea_t varea, void *start, _do_page_fault(&msg, off, vaddr, varea->mem_obj, varea); if (_varea_map_with_msg(varea, &msg)) + { + err = -RT_ENOMEM; break; - + } /** * It's hard to identify the mapping pattern on a customized handler * So we terminate the prefetch process on that case @@ -386,7 +380,7 @@ rt_varea_t _varea_create(void *start, rt_size_t size) } #define _IS_OVERFLOW(start, length) ((length) > (0ul - (uintptr_t)(start))) -#define _IS_OVERSIZE(start, length, limit_s, limit_sz) (((length) + (rt_size_t)((start) - (limit_start))) > (limit_size)) +#define _IS_OVERSIZE(start, length, limit_s, limit_sz) (((length) + (rt_size_t)((char *)(start) - (char *)(limit_start))) > (limit_size)) static inline int _not_in_range(void *start, rt_size_t length, void *limit_start, rt_size_t limit_size) @@ -449,6 +443,10 @@ int rt_aspace_map(rt_aspace_t aspace, void **addr, rt_size_t length, { rt_free(varea); } + else + { + *addr = varea->start; + } } else { @@ -461,10 +459,7 @@ int rt_aspace_map(rt_aspace_t aspace, void **addr, rt_size_t length, { *addr = NULL; } - else - { - *addr = varea->start; - } + return err; } @@ -642,7 +637,7 @@ int rt_aspace_unmap(rt_aspace_t aspace, void *addr) if (_not_in_range(addr, 1, aspace->start, aspace->size)) { LOG_I("%s: %lx not in range of aspace[%lx:%lx]", __func__, addr, - aspace->start, aspace->start + aspace->size); + aspace->start, (char *)aspace->start + aspace->size); return -RT_EINVAL; } @@ -658,7 +653,7 @@ static inline void *_lower(void *a, void *b) static inline void *_align(void *va, rt_ubase_t align_mask) { - return (void *)((rt_ubase_t)(va + ~align_mask) & align_mask); + return (void *)((rt_ubase_t)((char *)va + ~align_mask) & align_mask); } static void *_ascending_search(rt_varea_t varea, rt_size_t req_size, @@ -667,17 +662,17 @@ static void *_ascending_search(rt_varea_t varea, rt_size_t req_size, void *ret = RT_NULL; while (varea && varea->start < limit.end) { - void *candidate = varea->start + varea->size; + char *candidate = (char *)varea->start + varea->size; candidate = _align(candidate, align_mask); - if (candidate > limit.end || limit.end - candidate + 1 < req_size) + if (candidate > (char *)limit.end || (char *)limit.end - candidate + 1 < req_size) break; rt_varea_t nx_va = ASPACE_VAREA_NEXT(varea); if (nx_va) { rt_size_t gap_size = - _lower(limit.end, nx_va->start - 1) - candidate + 1; + (char *)_lower(limit.end, (char *)nx_va->start - 1) - candidate + 1; if (gap_size >= req_size) { ret = candidate; @@ -703,15 +698,15 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size, rt_varea_t varea = _aspace_bst_search_exceed(aspace, limit.start); if (varea) { - void *candidate = _align(limit.start, align_mask); - rt_size_t gap_size = varea->start - candidate; + char *candidate = _align(limit.start, align_mask); + rt_size_t gap_size = (char *)varea->start - candidate; if (gap_size >= req_size) { rt_varea_t former = _aspace_bst_search(aspace, limit.start); if (former) { - candidate = _align(former->start + former->size, align_mask); - gap_size = varea->start - candidate; + candidate = _align((char *)former->start + former->size, align_mask); + gap_size = (char *)varea->start - candidate; if (gap_size >= req_size) va = candidate; @@ -730,12 +725,12 @@ static void *_find_head_and_asc_search(rt_aspace_t aspace, rt_size_t req_size, } else { - void *candidate; + char *candidate; rt_size_t gap_size; candidate = limit.start; candidate = _align(candidate, align_mask); - gap_size = limit.end - candidate + 1; + gap_size = (char *)limit.end - candidate + 1; if (gap_size >= req_size) va = candidate; @@ -750,7 +745,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size, { rt_varea_t varea = NULL; void *va = RT_NULL; - struct _mm_range limit = {limit_start, limit_start + limit_size - 1}; + struct _mm_range limit = {limit_start, (char *)limit_start + limit_size - 1}; rt_ubase_t align_mask = ~0ul; if (flags & MMF_REQUEST_ALIGN) @@ -762,7 +757,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size, { /* if prefer and free, just return the prefer region */ prefer = _align(prefer, align_mask); - struct _mm_range range = {prefer, prefer + req_size - 1}; + struct _mm_range range = {prefer, (char *)prefer + req_size - 1}; varea = _aspace_bst_search_overlap(aspace, range); if (!varea) @@ -780,7 +775,7 @@ static void *_find_free(rt_aspace_t aspace, void *prefer, rt_size_t req_size, if (va == RT_NULL) { /* rewind to first range */ - limit.end = varea->start - 1; + limit.end = (char *)varea->start - 1; va = _find_head_and_asc_search(aspace, req_size, align_mask, limit); } @@ -798,7 +793,7 @@ int rt_aspace_load_page(rt_aspace_t aspace, void *addr, rt_size_t npage) { int err = RT_EOK; rt_varea_t varea; - void *end = addr + (npage << ARCH_PAGE_SHIFT); + char *end = (char *)addr + (npage << ARCH_PAGE_SHIFT); WR_LOCK(aspace); varea = _aspace_bst_search(aspace, addr); @@ -809,7 +804,7 @@ int rt_aspace_load_page(rt_aspace_t aspace, void *addr, rt_size_t npage) LOG_W("%s: varea not exist", __func__); err = -RT_ENOENT; } - else if (addr >= end || (rt_size_t)addr & ARCH_PAGE_MASK || + else if ((char *)addr >= end || (rt_size_t)addr & ARCH_PAGE_MASK || _not_in_range(addr, npage << ARCH_PAGE_SHIFT, varea->start, varea->size)) { @@ -938,12 +933,12 @@ static int _dump(rt_varea_t varea, void *arg) { if (varea->mem_obj && varea->mem_obj->get_name) { - rt_kprintf("[%p - %p] %s\n", varea->start, varea->start + varea->size, + rt_kprintf("[%p - %p] %s\n", varea->start, (char *)varea->start + varea->size, varea->mem_obj->get_name(varea)); } else { - rt_kprintf("[%p - %p] phy-map\n", varea->start, varea->start + varea->size); + rt_kprintf("[%p - %p] phy-map\n", varea->start, (char *)varea->start + varea->size); rt_kprintf("\t\\_ paddr = %p\n", varea->offset << MM_PAGE_SHIFT); } return 0; diff --git a/components/mm/mm_fault.c b/components/mm/mm_fault.c index e904e2b5a4..155be9df4d 100644 --- a/components/mm/mm_fault.c +++ b/components/mm/mm_fault.c @@ -104,7 +104,7 @@ int rt_aspace_fault_try_fix(struct rt_aspace_fault_msg *msg) if (varea) { void *pa = rt_hw_mmu_v2p(aspace, msg->fault_vaddr); - msg->off = (msg->fault_vaddr - varea->start) >> ARCH_PAGE_SHIFT; + msg->off = ((char *)msg->fault_vaddr - (char *)varea->start) >> ARCH_PAGE_SHIFT; /* permission checked by fault op */ switch (msg->fault_op) diff --git a/components/mm/mm_object.c b/components/mm/mm_object.c index 5f6649ba7e..3b4ccddc89 100644 --- a/components/mm/mm_object.c +++ b/components/mm/mm_object.c @@ -56,21 +56,21 @@ void rt_varea_pgmgr_pop_all(rt_varea_t varea) void rt_varea_pgmgr_pop(rt_varea_t varea, void *vaddr, rt_size_t size) { - void *vend = vaddr + size; + void *vend = (char *)vaddr + size; while (vaddr != vend) { rt_page_t page = rt_page_addr2page(vaddr); page->pre->next = page->next; page->next->pre = page->pre; rt_pages_free(vaddr, 0); - vaddr += ARCH_PAGE_SIZE; + vaddr = (char *)vaddr + ARCH_PAGE_SIZE; } } static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg) { void *page; - page = rt_pages_alloc(0); + page = rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (!page) { diff --git a/components/mm/mm_page.c b/components/mm/mm_page.c index 1ec95598ae..68193fd13c 100644 --- a/components/mm/mm_page.c +++ b/components/mm/mm_page.c @@ -8,6 +8,7 @@ * 2019-11-01 Jesven The first version * 2022-12-13 WangXiaoyao Hot-pluggable, extensible * page management algorithm + * 2023-02-20 WangXiaoyao Multi-list page-management */ #include @@ -39,7 +40,8 @@ static void *init_mpr_cont_start; static struct rt_varea mpr_varea; -static struct rt_page *page_list[RT_PAGE_MAX_ORDER]; +static struct rt_page *page_list_low[RT_PAGE_MAX_ORDER]; +static struct rt_page *page_list_high[RT_PAGE_MAX_ORDER]; #define page_start ((rt_page_t)rt_mpr_start) @@ -61,21 +63,18 @@ static void hint_free(rt_mm_va_hint_t hint) static void on_page_fault(struct rt_varea *varea, struct rt_aspace_fault_msg *msg) { - void *init_start = (void *)init_mpr_align_start; - void *init_end = (void *)init_mpr_align_end; - if (msg->fault_vaddr < init_end && msg->fault_vaddr >= init_start) + char *init_start = (void *)init_mpr_align_start; + char *init_end = (void *)init_mpr_align_end; + if ((char *)msg->fault_vaddr < init_end && (char *)msg->fault_vaddr >= init_start) { - rt_size_t offset = msg->fault_vaddr - init_start; + rt_size_t offset = (char *)msg->fault_vaddr - init_start; msg->response.status = MM_FAULT_STATUS_OK; - msg->response.vaddr = init_mpr_cont_start + offset; + msg->response.vaddr = (char *)init_mpr_cont_start + offset; msg->response.size = ARCH_PAGE_SIZE; } else { - void *raw_page = rt_pages_alloc(0); - msg->response.status = MM_FAULT_STATUS_OK; - msg->response.vaddr = raw_page; - msg->response.size = ARCH_PAGE_SIZE; + rt_mm_dummy_mapper.on_page_fault(varea, msg); } } @@ -85,15 +84,123 @@ static struct rt_mem_obj mm_page_mapper = { .hint_free = hint_free, }; +#ifdef RT_DEBUG_PAGE_LEAK +static volatile int enable; +static rt_page_t _trace_head; +#define TRACE_ALLOC(pg, size) _trace_alloc(pg, __builtin_return_address(0), size) +#define TRACE_FREE(pgaddr, size) _trace_free(pgaddr, __builtin_return_address(0), size) + +void rt_page_leak_trace_start() +{ + // TODO multicore safety + _trace_head = NULL; + enable = 1; +} +MSH_CMD_EXPORT(rt_page_leak_trace_start, start page leak tracer); + +static void _collect() +{ + rt_page_t page = _trace_head; + if (!page) + { + rt_kputs("ok!\n"); + } + + while (page) + { + rt_page_t next = page->next; + void *pg_va = rt_page_page2addr(page); + LOG_W("LEAK: %p, allocator: %p, size bits: %lx", pg_va, page->caller, page->trace_size); + rt_pages_free(pg_va, page->trace_size); + page = next; + } +} + +void rt_page_leak_trace_stop() +{ + // TODO multicore safety + enable = 0; + _collect(); +} +MSH_CMD_EXPORT(rt_page_leak_trace_stop, stop page leak tracer); + +static void _trace_alloc(rt_page_t page, void *caller, size_t size_bits) +{ + if (enable) + { + page->caller = caller; + page->trace_size = size_bits; + page->tl_prev = NULL; + page->tl_next = NULL; + + if (_trace_head == NULL) + { + _trace_head = page; + } + else + { + _trace_head->tl_prev = page; + page->tl_next = _trace_head; + _trace_head = page; + } + } +} + +void _report(rt_page_t page, size_bits, char *msg) +{ + void *pg_va = rt_page_page2addr(page); + LOG_W("%s: %p, allocator: %p, size bits: %lx", msg, pg_va, page->caller, page->trace_size); + rt_kputs("backtrace\n"); + rt_hw_backtrace(0, 0); +} + +static void _trace_free(rt_page_t page, void *caller, size_t size_bits) +{ + if (enable) + { + /* free after free */ + if (page->trace_size == 0xabadcafe) + { + _report("free after free") + return ; + } + else if (page->trace_size != size_bits) + { + rt_kprintf("free with size bits %lx\n", size_bits); + _report("incompatible size bits parameter"); + return ; + } + + if (page->ref_cnt == 1) + { + if (page->tl_prev) + page->tl_prev->tl_next = page->tl_next; + if (page->tl_next) + page->tl_next->tl_prev = page->tl_prev; + + if (page == _trace_head) + _trace_head = page->next; + + page->tl_prev = NULL; + page->tl_next = NULL; + page->trace_size = 0xabadcafe; + } + } +} +#else +#define TRACE_ALLOC(x, y) +#define TRACE_FREE(x, y) +#endif + static inline void *page_to_addr(rt_page_t page) { - return (void *)((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET; + return (void *)(((page - page_start) << ARCH_PAGE_SHIFT) - PV_OFFSET); } static inline rt_page_t addr_to_page(rt_page_t pg_start, void *addr) { - addr += PV_OFFSET; - return &pg_start[((uintptr_t)addr >> ARCH_PAGE_SHIFT)]; + addr = (char *)addr + PV_OFFSET; + return &pg_start[((rt_ubase_t)addr >> ARCH_PAGE_SHIFT)]; } #define FLOOR(val, align) (((rt_size_t)(val) + (align)-1) & ~((align)-1)) @@ -143,7 +250,7 @@ void *rt_page_page2addr(struct rt_page *p) return page_to_addr(p); } -static inline struct rt_page *buddy_get(struct rt_page *p, +static inline struct rt_page *_buddy_get(struct rt_page *p, rt_uint32_t size_bits) { rt_size_t addr; @@ -153,7 +260,7 @@ static inline struct rt_page *buddy_get(struct rt_page *p, return rt_page_addr2page((void *)addr); } -static void page_remove(struct rt_page *p, rt_uint32_t size_bits) +static void _page_remove(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits) { if (p->pre) { @@ -172,7 +279,7 @@ static void page_remove(struct rt_page *p, rt_uint32_t size_bits) p->size_bits = ARCH_ADDRESS_WIDTH_BITS; } -static void page_insert(struct rt_page *p, rt_uint32_t size_bits) +static void _page_insert(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits) { p->next = page_list[size_bits]; if (p->next) @@ -194,7 +301,7 @@ static void _pages_ref_inc(struct rt_page *p, rt_uint32_t size_bits) idx = idx & ~((1UL << size_bits) - 1); page_head = page_start + idx; - page_head = (void *)page_head + early_offset; + page_head = (void *)((char *)page_head + early_offset); page_head->ref_cnt++; } @@ -211,13 +318,13 @@ static int _pages_ref_get(struct rt_page *p, rt_uint32_t size_bits) return page_head->ref_cnt; } -static int _pages_free(struct rt_page *p, rt_uint32_t size_bits) +static int _pages_free(rt_page_t page_list[], struct rt_page *p, rt_uint32_t size_bits) { rt_uint32_t level = size_bits; struct rt_page *buddy; RT_ASSERT(p >= page_start); - RT_ASSERT((void *)p < rt_mpr_start + rt_mpr_size); + RT_ASSERT((char *)p < (char *)rt_mpr_start + rt_mpr_size); RT_ASSERT(rt_kmem_v2p(p)); RT_ASSERT(p->ref_cnt > 0); RT_ASSERT(p->size_bits == ARCH_ADDRESS_WIDTH_BITS); @@ -231,10 +338,10 @@ static int _pages_free(struct rt_page *p, rt_uint32_t size_bits) while (level < RT_PAGE_MAX_ORDER - 1) { - buddy = buddy_get(p, level); + buddy = _buddy_get(p, level); if (buddy && buddy->size_bits == level) { - page_remove(buddy, level); + _page_remove(page_list, buddy, level); p = (p < buddy) ? p : buddy; level++; } @@ -243,18 +350,18 @@ static int _pages_free(struct rt_page *p, rt_uint32_t size_bits) break; } } - page_insert(p, level); + _page_insert(page_list, p, level); return 1; } -static struct rt_page *_pages_alloc(rt_uint32_t size_bits) +static struct rt_page *_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits) { struct rt_page *p; if (page_list[size_bits]) { p = page_list[size_bits]; - page_remove(p, size_bits); + _page_remove(page_list, p, size_bits); } else { @@ -273,11 +380,11 @@ static struct rt_page *_pages_alloc(rt_uint32_t size_bits) } p = page_list[level]; - page_remove(p, level); + _page_remove(page_list, p, level); while (level > size_bits) { - page_insert(p, level - 1); - p = buddy_get(p, level - 1); + _page_insert(page_list, p, level - 1); + p = _buddy_get(p, level - 1); level--; } } @@ -286,12 +393,12 @@ static struct rt_page *_pages_alloc(rt_uint32_t size_bits) return p; } -static void _early_page_remove(rt_page_t page, rt_uint32_t size_bits) +static void _early_page_remove(rt_page_t page_list[], rt_page_t page, rt_uint32_t size_bits) { - rt_page_t page_cont = (void *)page + early_offset; + rt_page_t page_cont = (rt_page_t)((char *)page + early_offset); if (page_cont->pre) { - rt_page_t pre_cont = (void *)page_cont->pre + early_offset; + rt_page_t pre_cont = (rt_page_t)((char *)page_cont->pre + early_offset); pre_cont->next = page_cont->next; } else @@ -301,23 +408,23 @@ static void _early_page_remove(rt_page_t page, rt_uint32_t size_bits) if (page_cont->next) { - rt_page_t next_cont = (void *)page_cont->next + early_offset; + rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset); next_cont->pre = page_cont->pre; } page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS; } -static void _early_page_insert(rt_page_t page, int size_bits) +static void _early_page_insert(rt_page_t page_list[], rt_page_t page, int size_bits) { RT_ASSERT((void *)page >= rt_mpr_start && - (void *)page - rt_mpr_start < +rt_mpr_size); - rt_page_t page_cont = (void *)page + early_offset; + ((char *)page - (char *)rt_mpr_start) < rt_mpr_size); + rt_page_t page_cont = (rt_page_t)((char *)page + early_offset); page_cont->next = page_list[size_bits]; if (page_cont->next) { - rt_page_t next_cont = (void *)page_cont->next + early_offset; + rt_page_t next_cont = (rt_page_t)((char *)page_cont->next + early_offset); next_cont->pre = page; } page_cont->pre = 0; @@ -325,14 +432,14 @@ static void _early_page_insert(rt_page_t page, int size_bits) page_cont->size_bits = size_bits; } -static struct rt_page *_early_pages_alloc(rt_uint32_t size_bits) +static struct rt_page *_early_pages_alloc(rt_page_t page_list[], rt_uint32_t size_bits) { struct rt_page *p; if (page_list[size_bits]) { p = page_list[size_bits]; - _early_page_remove(p, size_bits); + _early_page_remove(page_list, p, size_bits); } else { @@ -351,20 +458,35 @@ static struct rt_page *_early_pages_alloc(rt_uint32_t size_bits) } p = page_list[level]; - _early_page_remove(p, level); + _early_page_remove(page_list, p, level); while (level > size_bits) { - _early_page_insert(p, level - 1); - p = buddy_get(p, level - 1); + _early_page_insert(page_list, p, level - 1); + p = _buddy_get(p, level - 1); level--; } } - rt_page_t page_cont = (void *)p + early_offset; + rt_page_t page_cont = (rt_page_t)((char *)p + early_offset); page_cont->size_bits = ARCH_ADDRESS_WIDTH_BITS; page_cont->ref_cnt = 1; return p; } +static rt_page_t *_get_page_list(void *vaddr) +{ + rt_ubase_t pa_int = (rt_ubase_t)vaddr + PV_OFFSET; + rt_page_t *list; + if (pa_int > UINT32_MAX) + { + list = page_list_high; + } + else + { + list = page_list_low; + } + return list; +} + int rt_page_ref_get(void *addr, rt_uint32_t size_bits) { struct rt_page *p; @@ -389,27 +511,73 @@ void rt_page_ref_inc(void *addr, rt_uint32_t size_bits) rt_hw_interrupt_enable(level); } -static rt_page_t (*pages_alloc_handler)(rt_uint32_t size_bits); +static rt_page_t (*pages_alloc_handler)(rt_page_t page_list[], rt_uint32_t size_bits); -void *rt_pages_alloc(rt_uint32_t size_bits) +/* if not, we skip the finding on page_list_high */ +static size_t _high_page_configured = 0; + +static rt_page_t *_flag_to_page_list(size_t flags) +{ + rt_page_t *page_list; + if (_high_page_configured && (flags & PAGE_ANY_AVAILABLE)) + { + page_list = page_list_high; + } + else + { + page_list = page_list_low; + } + return page_list; +} + +static void *_do_pages_alloc(rt_uint32_t size_bits, size_t flags) { void *alloc_buf = RT_NULL; struct rt_page *p; rt_base_t level; + rt_page_t *page_list = _flag_to_page_list(flags); level = rt_hw_interrupt_disable(); - p = pages_alloc_handler(size_bits); + p = pages_alloc_handler(page_list, size_bits); rt_hw_interrupt_enable(level); + + if (!p && page_list != page_list_low) + { + /* fall back */ + page_list = page_list_low; + + level = rt_hw_interrupt_disable(); + p = pages_alloc_handler(page_list, size_bits); + rt_hw_interrupt_enable(level); + } + if (p) { alloc_buf = page_to_addr(p); + + #ifdef RT_DEBUG_PAGE_LEAK + level = rt_hw_interrupt_disable(); + TRACE_ALLOC(p, size_bits); + rt_hw_interrupt_enable(level); + #endif } return alloc_buf; } +void *rt_pages_alloc(rt_uint32_t size_bits) +{ + return _do_pages_alloc(size_bits, 0); +} + +void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags) +{ + return _do_pages_alloc(size_bits, flags); +} + int rt_pages_free(void *addr, rt_uint32_t size_bits) { struct rt_page *p; + rt_page_t *page_list = _get_page_list(addr); int real_free = 0; p = rt_page_addr2page(addr); @@ -417,14 +585,18 @@ int rt_pages_free(void *addr, rt_uint32_t size_bits) { rt_base_t level; level = rt_hw_interrupt_disable(); - real_free = _pages_free(p, size_bits); + real_free = _pages_free(page_list, p, size_bits); + if (real_free) + TRACE_FREE(p, size_bits); rt_hw_interrupt_enable(level); } + return real_free; } void rt_page_list(void) __attribute__((alias("list_page"))); +#warning TODO: improve list page void list_page(void) { int i; @@ -435,7 +607,7 @@ void list_page(void) for (i = 0; i < RT_PAGE_MAX_ORDER; i++) { - struct rt_page *p = page_list[i]; + struct rt_page *p = page_list_low[i]; rt_kprintf("level %d ", i); @@ -447,6 +619,21 @@ void list_page(void) } rt_kprintf("\n"); } + for (i = 0; i < RT_PAGE_MAX_ORDER; i++) + { + struct rt_page *p = page_list_high[i]; + + rt_kprintf("level %d ", i); + + while (p) + { + total += (1UL << i); + rt_kprintf("[0x%08p]", rt_page_page2addr(p)); + p = p->next; + } + rt_kprintf("\n"); + } + rt_hw_interrupt_enable(level); rt_kprintf("free pages is 0x%08lx (%ld KB)\n", total, total * ARCH_PAGE_SIZE / 1024); rt_kprintf("-------------------------------\n"); @@ -462,7 +649,17 @@ void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr) level = rt_hw_interrupt_disable(); for (i = 0; i < RT_PAGE_MAX_ORDER; i++) { - struct rt_page *p = page_list[i]; + struct rt_page *p = page_list_low[i]; + + while (p) + { + total_free += (1UL << i); + p = p->next; + } + } + for (i = 0; i < RT_PAGE_MAX_ORDER; i++) + { + struct rt_page *p = page_list_high[i]; while (p) { @@ -475,6 +672,62 @@ void rt_page_get_info(rt_size_t *total_nr, rt_size_t *free_nr) *free_nr = total_free; } +static void _install_page(rt_page_t mpr_head, rt_region_t region, void *insert_handler) +{ + void (*insert)(rt_page_t *page_list, rt_page_t page, int size_bits) = insert_handler; + rt_region_t shadow; + shadow.start = region.start & ~shadow_mask; + shadow.end = FLOOR(region.end, shadow_mask + 1); + + if (shadow.end > UINT32_MAX) + _high_page_configured = 1; + + rt_page_t shad_head = addr_to_page(mpr_head, (void *)shadow.start); + rt_page_t shad_tail = addr_to_page(mpr_head, (void *)shadow.end); + rt_page_t head = addr_to_page(mpr_head, (void *)region.start); + rt_page_t tail = addr_to_page(mpr_head, (void *)region.end); + + /* mark shadow pages as illegal */ + for (rt_page_t iter = shad_head; iter < head; iter++) + { + iter->size_bits = ARCH_ADDRESS_WIDTH_BITS; + } + for (rt_page_t iter = tail; iter < shad_tail; iter++) + { + iter->size_bits = ARCH_ADDRESS_WIDTH_BITS; + } + + /* insert reserved pages to list */ + const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1; + while (region.start != region.end) + { + struct rt_page *p; + int align_bits; + int size_bits; + + size_bits = + ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(region.end - region.start); + align_bits = rt_hw_ctz(region.start); + if (align_bits < size_bits) + { + size_bits = align_bits; + } + if (size_bits > max_order) + { + size_bits = max_order; + } + + p = addr_to_page(mpr_head, (void *)region.start); + p->size_bits = ARCH_ADDRESS_WIDTH_BITS; + p->ref_cnt = 0; + + /* insert to list */ + rt_page_t *page_list = _get_page_list((void *)region.start); + insert(page_list, (rt_page_t)((char *)p - early_offset), size_bits - ARCH_PAGE_SHIFT); + region.start += (1UL << size_bits); + } +} + void rt_page_init(rt_region_t reg) { int i; @@ -500,7 +753,8 @@ void rt_page_init(rt_region_t reg) /* init free list */ for (i = 0; i < RT_PAGE_MAX_ORDER; i++) { - page_list[i] = 0; + page_list_low[i] = 0; + page_list_high[i] = 0; } /* map MPR area */ @@ -524,9 +778,9 @@ void rt_page_init(rt_region_t reg) rt_size_t init_mpr_npage = init_mpr_size >> ARCH_PAGE_SHIFT; init_mpr_cont_start = (void *)reg.start; - void *init_mpr_cont_end = init_mpr_cont_start + init_mpr_size; - early_offset = init_mpr_cont_start - (void *)init_mpr_align_start; - rt_page_t mpr_cont = rt_mpr_start + early_offset; + rt_size_t init_mpr_cont_end = (rt_size_t)init_mpr_cont_start + init_mpr_size; + early_offset = (rt_size_t)init_mpr_cont_start - init_mpr_align_start; + rt_page_t mpr_cont = (void *)((char *)rt_mpr_start + early_offset); /* mark init mpr pages as illegal */ rt_page_t head_cont = addr_to_page(mpr_cont, (void *)reg.start); @@ -536,48 +790,8 @@ void rt_page_init(rt_region_t reg) iter->size_bits = ARCH_ADDRESS_WIDTH_BITS; } - /* mark shadow pages as illegal */ - rt_page_t shad_head_cont = addr_to_page(mpr_cont, (void *)shadow.start); - for (rt_page_t iter = shad_head_cont; iter < head_cont; iter++) - { - iter->size_bits = ARCH_ADDRESS_WIDTH_BITS; - } - rt_page_t shad_tail_cont = addr_to_page(mpr_cont, (void *)shadow.end); - for (rt_page_t iter = tail_cont; iter < shad_tail_cont; iter++) - { - iter->size_bits = ARCH_ADDRESS_WIDTH_BITS; - } - - /* insert reserved pages to list */ - reg.start = (rt_size_t)init_mpr_cont_end; - const int max_order = RT_PAGE_MAX_ORDER + ARCH_PAGE_SHIFT - 1; - while (reg.start != reg.end) - { - struct rt_page *p; - int align_bits; - int size_bits; - - size_bits = - ARCH_ADDRESS_WIDTH_BITS - 1 - rt_hw_clz(reg.end - reg.start); - align_bits = rt_hw_ctz(reg.start); - if (align_bits < size_bits) - { - size_bits = align_bits; - } - if (size_bits > max_order) - { - size_bits = max_order; - } - - p = addr_to_page(mpr_cont, (void *)reg.start); - p->size_bits = ARCH_ADDRESS_WIDTH_BITS; - p->ref_cnt = 0; - - /* insert to list */ - _early_page_insert((void *)p - early_offset, - size_bits - ARCH_PAGE_SHIFT); - reg.start += (1UL << size_bits); - } + reg.start = init_mpr_cont_end; + _install_page(mpr_cont, reg, _early_page_insert); pages_alloc_handler = _early_pages_alloc; /* doing the page table bushiness */ @@ -594,7 +808,7 @@ void rt_page_init(rt_region_t reg) static int _load_mpr_area(void *head, void *tail) { int err = 0; - void *iter = (void *)((uintptr_t)head & ~ARCH_PAGE_MASK); + char *iter = (char *)((rt_ubase_t)head & ~ARCH_PAGE_MASK); tail = (void *)FLOOR(tail, ARCH_PAGE_SIZE); while (iter != tail) @@ -630,19 +844,7 @@ int rt_page_install(rt_region_t region) if (err == RT_EOK) { - while (region.start != region.end) - { - struct rt_page *p; - int size_bits; - - size_bits = RT_PAGE_MAX_ORDER - 1; - p = addr_to_page(page_start, (void *)region.start); - p->size_bits = ARCH_ADDRESS_WIDTH_BITS; - p->ref_cnt = 1; - - _pages_free(p, size_bits); - region.start += (1UL << (size_bits + ARCH_PAGE_SHIFT)); - } + _install_page(rt_mpr_start, region, _page_insert); } } return err; diff --git a/components/mm/mm_page.h b/components/mm/mm_page.h index 788a5ef5b0..4ced90f9aa 100644 --- a/components/mm/mm_page.h +++ b/components/mm/mm_page.h @@ -23,10 +23,35 @@ union {struct {fields}; char _padding[GET_FLOOR(struct {fields})];};\ } *rt_page_t +/** + * @brief PAGE ALLOC FLAGS + * + * @info PAGE_ANY_AVAILABLE + * page allocation default to use lower region, this behavior can change by setting + * PAGE_ANY_AVAILABLE + */ + +#define PAGE_ANY_AVAILABLE 0x1ul + + +#ifdef RT_DEBUG_PAGE_LEAK +#define DEBUG_FIELD { \ + /* trace list */ \ + struct rt_page *tl_next; \ + struct rt_page *tl_prev; \ + void *caller; \ + size_t trace_size; \ +} +#else +#define DEBUG_FIELD +#endif + DEF_PAGE_T( struct rt_page *next; /* same level next */ struct rt_page *pre; /* same level pre */ + DEBUG_FIELD + rt_uint32_t size_bits; /* if is ARCH_ADDRESS_WIDTH_BITS, means not free */ rt_uint32_t ref_cnt; /* page group ref count */ ); @@ -49,6 +74,8 @@ void rt_page_cleanup(void); void *rt_pages_alloc(rt_uint32_t size_bits); +void *rt_pages_alloc_ext(rt_uint32_t size_bits, size_t flags); + void rt_page_ref_inc(void *addr, rt_uint32_t size_bits); int rt_page_ref_get(void *addr, rt_uint32_t size_bits); @@ -78,4 +105,8 @@ struct rt_page *rt_page_addr2page(void *addr); */ int rt_page_install(rt_region_t region); +void rt_page_leak_trace_start(void); + +void rt_page_leak_trace_stop(void); + #endif /* __MM_PAGE_H__ */ diff --git a/libcpu/aarch64/common/cache.h b/libcpu/aarch64/common/cache.h index e94e4b65da..06a2805c48 100644 --- a/libcpu/aarch64/common/cache.h +++ b/libcpu/aarch64/common/cache.h @@ -23,7 +23,10 @@ void rt_hw_cpu_dcache_invalidate(void *start_addr, unsigned long size); static inline void rt_hw_icache_invalidate_all(void) { - __asm_invalidate_icache_all(); + /* wait for any modification complete */ + __asm__ volatile ("dsb ishst"); + __asm__ volatile ("ic iallu"); + __asm__ volatile ("isb"); } void rt_hw_cpu_icache_invalidate(void *addr, rt_size_t size); diff --git a/libcpu/aarch64/common/cpu.c b/libcpu/aarch64/common/cpu.c index f5f96a5d90..8f7f2843a9 100644 --- a/libcpu/aarch64/common/cpu.c +++ b/libcpu/aarch64/common/cpu.c @@ -132,7 +132,10 @@ static rt_uint64_t _read_be_number(void *start, int size) { rt_uint64_t buf = 0; for (; size > 0; size--) - buf = (buf << 32) | fdt32_to_cpu(*(uint32_t *)start++); + { + buf = (buf << 32) | fdt32_to_cpu(*(uint32_t *)start); + start = (uint32_t *)start + 1; + } return buf; } diff --git a/libcpu/aarch64/common/mmu.c b/libcpu/aarch64/common/mmu.c index 1e94fdca97..a08f3fca53 100644 --- a/libcpu/aarch64/common/mmu.c +++ b/libcpu/aarch64/common/mmu.c @@ -1,13 +1,14 @@ /* - * Copyright (c) 2006-2018, RT-Thread Development Team + * Copyright (c) 2006-2023, RT-Thread Development Team * * SPDX-License-Identifier: Apache-2.0 * * Change Logs: * Date Author Notes * 2012-01-10 bernard porting to AM1808 + * 2021-11-28 GuEe-GUI first version + * 2022-12-10 WangXiaoyao porting to MM */ - #include #include #include @@ -79,6 +80,7 @@ static void _kenrel_unmap_4K(unsigned long *lv0_tbl, void *v_addr) { break; } + /* next table entry in current level */ level_info[level].pos = cur_lv_tbl + off; cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK); cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET); @@ -119,8 +121,7 @@ static void _kenrel_unmap_4K(unsigned long *lv0_tbl, void *v_addr) return; } -static int _kenrel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr, - unsigned long attr) +static int _kernel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr) { int ret = 0; int level; @@ -145,7 +146,7 @@ static int _kenrel_map_4K(unsigned long *lv0_tbl, void *vaddr, void *paddr, off &= MMU_LEVEL_MASK; if (!(cur_lv_tbl[off] & MMU_TYPE_USED)) { - page = (unsigned long)rt_pages_alloc(0); + page = (unsigned long)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); if (!page) { ret = MMU_MAP_ERROR_NOPAGE; @@ -188,19 +189,104 @@ err: return ret; } +static int _kernel_map_2M(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr) +{ + int ret = 0; + int level; + unsigned long *cur_lv_tbl = lv0_tbl; + unsigned long page; + unsigned long off; + unsigned long va = (unsigned long)vaddr; + unsigned long pa = (unsigned long)paddr; + + int level_shift = MMU_ADDRESS_BITS; + + if (va & ARCH_SECTION_MASK) + { + return MMU_MAP_ERROR_VANOTALIGN; + } + if (pa & ARCH_SECTION_MASK) + { + return MMU_MAP_ERROR_PANOTALIGN; + } + for (level = 0; level < MMU_TBL_BLOCK_2M_LEVEL; level++) + { + off = (va >> level_shift); + off &= MMU_LEVEL_MASK; + if (!(cur_lv_tbl[off] & MMU_TYPE_USED)) + { + page = (unsigned long)rt_pages_alloc_ext(0, PAGE_ANY_AVAILABLE); + if (!page) + { + ret = MMU_MAP_ERROR_NOPAGE; + goto err; + } + rt_memset((char *)page, 0, ARCH_PAGE_SIZE); + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)page, ARCH_PAGE_SIZE); + cur_lv_tbl[off] = (page + PV_OFFSET) | MMU_TYPE_TABLE; + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *)); + } + else + { + page = cur_lv_tbl[off]; + page &= MMU_ADDRESS_MASK; + /* page to va */ + page -= PV_OFFSET; + rt_page_ref_inc((void *)page, 0); + } + page = cur_lv_tbl[off]; + if ((page & MMU_TYPE_MASK) == MMU_TYPE_BLOCK) + { + /* is block! error! */ + ret = MMU_MAP_ERROR_CONFLICT; + goto err; + } + cur_lv_tbl = (unsigned long *)(page & MMU_ADDRESS_MASK); + cur_lv_tbl = (unsigned long *)((unsigned long)cur_lv_tbl - PV_OFFSET); + level_shift -= MMU_LEVEL_SHIFT; + } + /* now is level page */ + attr &= MMU_ATTRIB_MASK; + pa |= (attr | MMU_TYPE_BLOCK); /* block */ + off = (va >> ARCH_SECTION_SHIFT); + off &= MMU_LEVEL_MASK; + cur_lv_tbl[off] = pa; + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, cur_lv_tbl + off, sizeof(void *)); + return ret; +err: + _kenrel_unmap_4K(lv0_tbl, (void *)va); + return ret; +} + void *rt_hw_mmu_map(rt_aspace_t aspace, void *v_addr, void *p_addr, size_t size, size_t attr) { int ret = -1; void *unmap_va = v_addr; - size_t npages = size >> ARCH_PAGE_SHIFT; + size_t npages; + size_t stride; + int (*mapper)(unsigned long *lv0_tbl, void *vaddr, void *paddr, unsigned long attr); + + if (((rt_ubase_t)v_addr & ARCH_SECTION_MASK) || (size & ARCH_SECTION_MASK)) + { + /* legacy 4k mapping */ + npages = size >> ARCH_PAGE_SHIFT; + stride = ARCH_PAGE_SIZE; + mapper = _kernel_map_4K; + } + else + { + /* 2m huge page */ + npages = size >> ARCH_SECTION_SHIFT; + stride = ARCH_SECTION_SIZE; + mapper = _kernel_map_2M; + } - // TODO trying with HUGEPAGE here while (npages--) { MM_PGTBL_LOCK(aspace); - ret = _kenrel_map_4K(aspace->page_table, v_addr, p_addr, attr); + ret = mapper(aspace->page_table, v_addr, p_addr, attr); MM_PGTBL_UNLOCK(aspace); if (ret != 0) @@ -213,12 +299,12 @@ void *rt_hw_mmu_map(rt_aspace_t aspace, void *v_addr, void *p_addr, size_t size, MM_PGTBL_LOCK(aspace); _kenrel_unmap_4K(aspace->page_table, (void *)unmap_va); MM_PGTBL_UNLOCK(aspace); - unmap_va += ARCH_PAGE_SIZE; + unmap_va = (char *)unmap_va + stride; } break; } - v_addr += ARCH_PAGE_SIZE; - p_addr += ARCH_PAGE_SIZE; + v_addr = (char *)v_addr + stride; + p_addr = (char *)p_addr + stride; } if (ret == 0) @@ -244,7 +330,7 @@ void rt_hw_mmu_unmap(rt_aspace_t aspace, void *v_addr, size_t size) MM_PGTBL_LOCK(aspace); _kenrel_unmap_4K(aspace->page_table, v_addr); MM_PGTBL_UNLOCK(aspace); - v_addr += ARCH_PAGE_SIZE; + v_addr = (char *)v_addr + ARCH_PAGE_SIZE; } } @@ -254,7 +340,7 @@ void rt_hw_aspace_switch(rt_aspace_t aspace) { void *pgtbl = aspace->page_table; pgtbl = rt_kmem_v2p(pgtbl); - uintptr_t tcr; + rt_ubase_t tcr; __asm__ volatile("msr ttbr0_el1, %0" ::"r"(pgtbl) : "memory"); @@ -337,20 +423,19 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr) rt_page_cleanup(); } - #ifdef RT_USING_SMART -static inline void _init_region(void *vaddr, size_t size) +static void _init_region(void *vaddr, size_t size) { rt_ioremap_start = vaddr; rt_ioremap_size = size; - rt_mpr_start = rt_ioremap_start - rt_mpr_size; + rt_mpr_start = (char *)rt_ioremap_start - rt_mpr_size; } #else -#define RTOS_VEND ((void *)0xfffffffff000UL) +#define RTOS_VEND (0xfffffffff000UL) static inline void _init_region(void *vaddr, size_t size) { - rt_mpr_start = RTOS_VEND - rt_mpr_size; + rt_mpr_start = (void *)(RTOS_VEND - rt_mpr_size); } #endif @@ -395,7 +480,7 @@ int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, size_t size, rt_aspace_init(aspace, (void *)KERNEL_VADDR_START, 0 - KERNEL_VADDR_START, vtable); #else - rt_aspace_init(aspace, (void *)0x1000, RTOS_VEND - (void *)0x1000, vtable); + rt_aspace_init(aspace, (void *)0x1000, RTOS_VEND - 0x1000ul, vtable); #endif _init_region(v_address, size); @@ -586,26 +671,35 @@ void *rt_hw_mmu_v2p(rt_aspace_t aspace, void *v_addr) { int level_shift; unsigned long paddr; - unsigned long *pte = _query(aspace, v_addr, &level_shift); - if (pte) + if (aspace == &rt_kernel_space) { - paddr = *pte & MMU_ADDRESS_MASK; - paddr |= (uintptr_t)v_addr & ((1ul << level_shift) - 1); + paddr = (unsigned long)rt_hw_mmu_kernel_v2p(v_addr); } else { - paddr = (unsigned long)ARCH_MAP_FAILED; + unsigned long *pte = _query(aspace, v_addr, &level_shift); + + if (pte) + { + paddr = *pte & MMU_ADDRESS_MASK; + paddr |= (rt_ubase_t)v_addr & ((1ul << level_shift) - 1); + } + else + { + paddr = (unsigned long)ARCH_MAP_FAILED; + } } + return (void *)paddr; } -static int _noncache(uintptr_t *pte) +static int _noncache(rt_ubase_t *pte) { int err = 0; - const uintptr_t idx_shift = 2; - const uintptr_t idx_mask = 0x7 << idx_shift; - uintptr_t entry = *pte; + const rt_ubase_t idx_shift = 2; + const rt_ubase_t idx_mask = 0x7 << idx_shift; + rt_ubase_t entry = *pte; if ((entry & idx_mask) == (NORMAL_MEM << idx_shift)) { *pte = (entry & ~idx_mask) | (NORMAL_NOCACHE_MEM << idx_shift); @@ -618,12 +712,12 @@ static int _noncache(uintptr_t *pte) return err; } -static int _cache(uintptr_t *pte) +static int _cache(rt_ubase_t *pte) { int err = 0; - const uintptr_t idx_shift = 2; - const uintptr_t idx_mask = 0x7 << idx_shift; - uintptr_t entry = *pte; + const rt_ubase_t idx_shift = 2; + const rt_ubase_t idx_mask = 0x7 << idx_shift; + rt_ubase_t entry = *pte; if ((entry & idx_mask) == (NORMAL_NOCACHE_MEM << idx_shift)) { *pte = (entry & ~idx_mask) | (NORMAL_MEM << idx_shift); @@ -636,7 +730,7 @@ static int _cache(uintptr_t *pte) return err; } -static int (*control_handler[MMU_CNTL_DUMMY_END])(uintptr_t *pte) = { +static int (*control_handler[MMU_CNTL_DUMMY_END])(rt_ubase_t *pte) = { [MMU_CNTL_CACHE] = _cache, [MMU_CNTL_NONCACHE] = _noncache, }; @@ -646,17 +740,18 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size, { int level_shift; int err = -RT_EINVAL; - void *vend = vaddr + size; + rt_ubase_t vstart = (rt_ubase_t)vaddr; + rt_ubase_t vend = vstart + size; - int (*handler)(uintptr_t * pte); + int (*handler)(rt_ubase_t * pte); if (cmd >= 0 && cmd < MMU_CNTL_DUMMY_END) { handler = control_handler[cmd]; - while (vaddr < vend) + while (vstart < vend) { - uintptr_t *pte = _query(aspace, vaddr, &level_shift); - void *range_end = vaddr + (1ul << level_shift); + rt_ubase_t *pte = _query(aspace, (void *)vstart, &level_shift); + rt_ubase_t range_end = vstart + (1ul << level_shift); RT_ASSERT(range_end <= vend); if (pte) @@ -664,7 +759,7 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size, err = handler(pte); RT_ASSERT(err == RT_EOK); } - vaddr = range_end; + vstart = range_end; } } else diff --git a/libcpu/aarch64/common/mmu.h b/libcpu/aarch64/common/mmu.h index af9505ea6a..d85f79e792 100644 --- a/libcpu/aarch64/common/mmu.h +++ b/libcpu/aarch64/common/mmu.h @@ -97,6 +97,28 @@ static inline void *rt_hw_mmu_tbl_get() return (void *)(tbl & ((1ul << 48) - 2)); } +static inline void *rt_hw_mmu_kernel_v2p(void *v_addr) +{ + rt_ubase_t par; + void *paddr; + asm volatile("at s1e1w, %0"::"r"(v_addr):"memory"); + asm volatile("mrs %0, par_el1":"=r"(par)::"memory"); + + if (par & 0x1) + { + paddr = ARCH_MAP_FAILED; + } + else + { + #define MMU_ADDRESS_MASK 0x0000fffffffff000UL + par &= MMU_ADDRESS_MASK; + par |= (rt_ubase_t)v_addr & ARCH_PAGE_MASK; + paddr = (void *)par; + } + + return paddr; +} + int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size, enum rt_mmu_cntl cmd); diff --git a/src/Kconfig b/src/Kconfig index a6f8a14bb0..ad6528048d 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -261,6 +261,12 @@ if RT_DEBUG int default 1 if RT_DEBUG_MEMHEAP_CONFIG + if ARCH_MM_MMU + config RT_DEBUG_PAGE_LEAK + bool "Enable page leaking tracer" + default n + endif + config RT_DEBUG_MODULE_CONFIG bool "Enable debugging of Application Module" default n @@ -305,15 +311,17 @@ endmenu menu "Memory Management" - config RT_PAGE_MAX_ORDER - int "Max order of pages allocatable by page allocator" - default 11 - help - For example, A value of 11 means the maximum chunk of contiguous memory - allocatable by page system is 2^(11 + ARCH_PAGE_BITS - 1) Bytes. - Large memory requirement can consume all system resource, and should - consider reserved memory instead to enhance system endurance. - Max order should at least satisfied usage by huge page. + if ARCH_MM_MMU + config RT_PAGE_MAX_ORDER + int "Max order of pages allocatable by page allocator" + default 11 + help + For example, A value of 11 means the maximum chunk of contiguous memory + allocatable by page system is 2^(11 + ARCH_PAGE_BITS - 1) Bytes. + Large memory requirement can consume all system resource, and should + consider reserved memory instead to enhance system endurance. + Max order should at least satisfied usage by huge page. + endif config RT_USING_MEMPOOL bool "Using memory pool"