From b424169e1742c1b19d039f9871dd3c672ea7b729 Mon Sep 17 00:00:00 2001 From: GUI <39894654+GuEe-GUI@users.noreply.github.com> Date: Sat, 15 Jul 2023 15:21:23 +0800 Subject: [PATCH] AArch64: fixup fpu storage's size in stack and append Q16 ~ Q31 (#7815) Signed-off-by: GuEe-GUI --- .../lwp/arch/aarch64/cortex-a/lwp_gcc.S | 110 +----------- libcpu/aarch64/common/armv8.h | 161 ++++++++++++++---- libcpu/aarch64/common/asm-fpu.h | 35 +++- libcpu/aarch64/common/stack.c | 41 +---- 4 files changed, 171 insertions(+), 176 deletions(-) diff --git a/components/lwp/arch/aarch64/cortex-a/lwp_gcc.S b/components/lwp/arch/aarch64/cortex-a/lwp_gcc.S index 43cac34edd..a8f81ac248 100644 --- a/components/lwp/arch/aarch64/cortex-a/lwp_gcc.S +++ b/components/lwp/arch/aarch64/cortex-a/lwp_gcc.S @@ -8,9 +8,14 @@ * 2021-05-18 Jesven first version */ +#ifndef __ASSEMBLY__ +#define __ASSEMBLY__ +#endif + #include "rtconfig.h" #include "asm-generic.h" #include "asm-fpu.h" +#include "armv8.h" /********************* * SPSR BIT * @@ -32,67 +37,6 @@ #define SPSR_Z(v) ((v) << 30) #define SPSR_N(v) ((v) << 31) -/********************* - * CONTEXT_OFFSET * - *********************/ - -#define CONTEXT_OFFSET_ELR_EL1 0x0 -#define CONTEXT_OFFSET_SPSR_EL1 0x8 -#define CONTEXT_OFFSET_SP_EL0 0x10 -#define CONTEXT_OFFSET_X30 0x18 -#define CONTEXT_OFFSET_FPCR 0x20 -#define CONTEXT_OFFSET_FPSR 0x28 -#define CONTEXT_OFFSET_X28 0x30 -#define CONTEXT_OFFSET_X29 0x38 -#define CONTEXT_OFFSET_X26 0x40 -#define CONTEXT_OFFSET_X27 0x48 -#define CONTEXT_OFFSET_X24 0x50 -#define CONTEXT_OFFSET_X25 0x58 -#define CONTEXT_OFFSET_X22 0x60 -#define CONTEXT_OFFSET_X23 0x68 -#define CONTEXT_OFFSET_X20 0x70 -#define CONTEXT_OFFSET_X21 0x78 -#define CONTEXT_OFFSET_X18 0x80 -#define CONTEXT_OFFSET_X19 0x88 -#define CONTEXT_OFFSET_X16 0x90 -#define CONTEXT_OFFSET_X17 0x98 -#define CONTEXT_OFFSET_X14 0xa0 -#define CONTEXT_OFFSET_X15 0xa8 -#define CONTEXT_OFFSET_X12 0xb0 -#define CONTEXT_OFFSET_X13 0xb8 -#define CONTEXT_OFFSET_X10 0xc0 -#define CONTEXT_OFFSET_X11 0xc8 -#define CONTEXT_OFFSET_X8 0xd0 -#define CONTEXT_OFFSET_X9 0xd8 -#define CONTEXT_OFFSET_X6 0xe0 -#define CONTEXT_OFFSET_X7 0xe8 -#define CONTEXT_OFFSET_X4 0xf0 -#define CONTEXT_OFFSET_X5 0xf8 -#define CONTEXT_OFFSET_X2 0x100 -#define CONTEXT_OFFSET_X3 0x108 -#define CONTEXT_OFFSET_X0 0x110 -#define CONTEXT_OFFSET_X1 0x118 - -#define CONTEXT_OFFSET_Q15 0x120 -#define CONTEXT_OFFSET_Q14 0x130 -#define CONTEXT_OFFSET_Q13 0x140 -#define CONTEXT_OFFSET_Q12 0x150 -#define CONTEXT_OFFSET_Q11 0x160 -#define CONTEXT_OFFSET_Q10 0x170 -#define CONTEXT_OFFSET_Q9 0x180 -#define CONTEXT_OFFSET_Q8 0x190 -#define CONTEXT_OFFSET_Q7 0x1a0 -#define CONTEXT_OFFSET_Q6 0x1b0 -#define CONTEXT_OFFSET_Q5 0x1c0 -#define CONTEXT_OFFSET_Q4 0x1d0 -#define CONTEXT_OFFSET_Q3 0x1e0 -#define CONTEXT_OFFSET_Q2 0x1f0 -#define CONTEXT_OFFSET_Q1 0x200 -#define CONTEXT_OFFSET_Q0 0x210 - -#define CONTEXT_FPU_SIZE 0x100 -#define CONTEXT_SIZE 0x220 - /**************************************************/ .text @@ -370,50 +314,6 @@ arch_ret_to_user: 1: eret -/* -struct rt_hw_exp_stack -{ - unsigned long pc; 0 - unsigned long cpsr; - unsigned long sp_el0; 0x10 - unsigned long x30; - unsigned long fpcr; 0x20 - unsigned long fpsr; - unsigned long x28; 0x30 - unsigned long x29; - unsigned long x26; 0x40 - unsigned long x27; - unsigned long x24; 0x50 - unsigned long x25; - unsigned long x22; 0x60 - unsigned long x23; - unsigned long x20; 0x70 - unsigned long x21; - unsigned long x18; 0x80 - unsigned long x19; - unsigned long x16; 0x90 - unsigned long x17; - unsigned long x14; 0xa0 - unsigned long x15; - unsigned long x12; 0xb0 - unsigned long x13; - unsigned long x10; 0xc0 - unsigned long x11; - unsigned long x8; 0xd0 - unsigned long x9; - unsigned long x6; 0xe0 - unsigned long x7; - unsigned long x4; 0xf0 - unsigned long x5; - unsigned long x2; 0x100 - unsigned long x3; - unsigned long x0; 0x110 - unsigned long x1; - - unsigned long long fpu[16]; 0x120 - 0x220 = 0x120 + 0x10 * 0x10 -}; -*/ .global lwp_check_debug lwp_check_debug: ldr x0, =rt_dbg_ops diff --git a/libcpu/aarch64/common/armv8.h b/libcpu/aarch64/common/armv8.h index d96a5af72a..a6d084f620 100644 --- a/libcpu/aarch64/common/armv8.h +++ b/libcpu/aarch64/common/armv8.h @@ -11,47 +11,132 @@ #ifndef __ARMV8_H__ #define __ARMV8_H__ +#ifdef __ASSEMBLY__ + +/********************* + * CONTEXT_OFFSET * + *********************/ + +#define CONTEXT_OFFSET_ELR_EL1 0x0 +#define CONTEXT_OFFSET_SPSR_EL1 0x8 +#define CONTEXT_OFFSET_SP_EL0 0x10 +#define CONTEXT_OFFSET_X30 0x18 +#define CONTEXT_OFFSET_FPCR 0x20 +#define CONTEXT_OFFSET_FPSR 0x28 +#define CONTEXT_OFFSET_X28 0x30 +#define CONTEXT_OFFSET_X29 0x38 +#define CONTEXT_OFFSET_X26 0x40 +#define CONTEXT_OFFSET_X27 0x48 +#define CONTEXT_OFFSET_X24 0x50 +#define CONTEXT_OFFSET_X25 0x58 +#define CONTEXT_OFFSET_X22 0x60 +#define CONTEXT_OFFSET_X23 0x68 +#define CONTEXT_OFFSET_X20 0x70 +#define CONTEXT_OFFSET_X21 0x78 +#define CONTEXT_OFFSET_X18 0x80 +#define CONTEXT_OFFSET_X19 0x88 +#define CONTEXT_OFFSET_X16 0x90 +#define CONTEXT_OFFSET_X17 0x98 +#define CONTEXT_OFFSET_X14 0xa0 +#define CONTEXT_OFFSET_X15 0xa8 +#define CONTEXT_OFFSET_X12 0xb0 +#define CONTEXT_OFFSET_X13 0xb8 +#define CONTEXT_OFFSET_X10 0xc0 +#define CONTEXT_OFFSET_X11 0xc8 +#define CONTEXT_OFFSET_X8 0xd0 +#define CONTEXT_OFFSET_X9 0xd8 +#define CONTEXT_OFFSET_X6 0xe0 +#define CONTEXT_OFFSET_X7 0xe8 +#define CONTEXT_OFFSET_X4 0xf0 +#define CONTEXT_OFFSET_X5 0xf8 +#define CONTEXT_OFFSET_X2 0x100 +#define CONTEXT_OFFSET_X3 0x108 +#define CONTEXT_OFFSET_X0 0x110 +#define CONTEXT_OFFSET_X1 0x118 + +#define CONTEXT_OFFSET_Q31 0x120 +#define CONTEXT_OFFSET_Q30 0x130 +#define CONTEXT_OFFSET_Q29 0x140 +#define CONTEXT_OFFSET_Q28 0x150 +#define CONTEXT_OFFSET_Q27 0x160 +#define CONTEXT_OFFSET_Q26 0x170 +#define CONTEXT_OFFSET_Q25 0x180 +#define CONTEXT_OFFSET_Q24 0x190 +#define CONTEXT_OFFSET_Q23 0x1a0 +#define CONTEXT_OFFSET_Q22 0x1b0 +#define CONTEXT_OFFSET_Q21 0x1c0 +#define CONTEXT_OFFSET_Q20 0x1d0 +#define CONTEXT_OFFSET_Q19 0x1e0 +#define CONTEXT_OFFSET_Q18 0x1f0 +#define CONTEXT_OFFSET_Q17 0x200 +#define CONTEXT_OFFSET_Q16 0x210 +#define CONTEXT_OFFSET_Q15 0x220 +#define CONTEXT_OFFSET_Q14 0x230 +#define CONTEXT_OFFSET_Q13 0x240 +#define CONTEXT_OFFSET_Q12 0x250 +#define CONTEXT_OFFSET_Q11 0x260 +#define CONTEXT_OFFSET_Q10 0x270 +#define CONTEXT_OFFSET_Q9 0x280 +#define CONTEXT_OFFSET_Q8 0x290 +#define CONTEXT_OFFSET_Q7 0x2a0 +#define CONTEXT_OFFSET_Q6 0x2b0 +#define CONTEXT_OFFSET_Q5 0x2c0 +#define CONTEXT_OFFSET_Q4 0x2d0 +#define CONTEXT_OFFSET_Q3 0x2e0 +#define CONTEXT_OFFSET_Q2 0x2f0 +#define CONTEXT_OFFSET_Q1 0x300 +#define CONTEXT_OFFSET_Q0 0x310 + +#define CONTEXT_FPU_SIZE (32 * 16) +#define CONTEXT_SIZE (0x120 + CONTEXT_FPU_SIZE) + +#else /* !__ASSEMBLY__ */ + +#include + +typedef struct { rt_uint64_t value[2]; } rt_uint128_t; + /* the exception stack without VFP registers */ struct rt_hw_exp_stack { - unsigned long pc; - unsigned long cpsr; - unsigned long sp_el0; - unsigned long x30; - unsigned long fpcr; - unsigned long fpsr; - unsigned long x28; - unsigned long x29; - unsigned long x26; - unsigned long x27; - unsigned long x24; - unsigned long x25; - unsigned long x22; - unsigned long x23; - unsigned long x20; - unsigned long x21; - unsigned long x18; - unsigned long x19; - unsigned long x16; - unsigned long x17; - unsigned long x14; - unsigned long x15; - unsigned long x12; - unsigned long x13; - unsigned long x10; - unsigned long x11; - unsigned long x8; - unsigned long x9; - unsigned long x6; - unsigned long x7; - unsigned long x4; - unsigned long x5; - unsigned long x2; - unsigned long x3; - unsigned long x0; - unsigned long x1; + rt_uint64_t pc; + rt_uint64_t cpsr; + rt_uint64_t sp_el0; + rt_uint64_t x30; + rt_uint64_t fpcr; + rt_uint64_t fpsr; + rt_uint64_t x28; + rt_uint64_t x29; + rt_uint64_t x26; + rt_uint64_t x27; + rt_uint64_t x24; + rt_uint64_t x25; + rt_uint64_t x22; + rt_uint64_t x23; + rt_uint64_t x20; + rt_uint64_t x21; + rt_uint64_t x18; + rt_uint64_t x19; + rt_uint64_t x16; + rt_uint64_t x17; + rt_uint64_t x14; + rt_uint64_t x15; + rt_uint64_t x12; + rt_uint64_t x13; + rt_uint64_t x10; + rt_uint64_t x11; + rt_uint64_t x8; + rt_uint64_t x9; + rt_uint64_t x6; + rt_uint64_t x7; + rt_uint64_t x4; + rt_uint64_t x5; + rt_uint64_t x2; + rt_uint64_t x3; + rt_uint64_t x0; + rt_uint64_t x1; - unsigned long long fpu[16]; + rt_uint128_t fpu[32]; }; #define SP_ELx ((unsigned long)0x01) @@ -64,4 +149,6 @@ rt_ubase_t rt_hw_get_current_el(void); void rt_hw_set_elx_env(void); void rt_hw_set_current_vbar(rt_ubase_t addr); +#endif /* __ASSEMBLY__ */ + #endif diff --git a/libcpu/aarch64/common/asm-fpu.h b/libcpu/aarch64/common/asm-fpu.h index 8ac4ab9bd8..9729f8c1b2 100644 --- a/libcpu/aarch64/common/asm-fpu.h +++ b/libcpu/aarch64/common/asm-fpu.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 2006-2021, RT-Thread Development Team + * Copyright (c) 2006-2023, RT-Thread Development Team * * SPDX-License-Identifier: Apache-2.0 * * Change Logs: * Date Author Notes * 2021-05-18 Jesven the first version + * 2023-07-13 GuEe-GUI append Q16 ~ Q31 */ .macro SAVE_FPU, reg @@ -25,8 +26,40 @@ STR Q13, [\reg, #-0x10]! STR Q14, [\reg, #-0x10]! STR Q15, [\reg, #-0x10]! + STR Q16, [\reg, #-0x10]! + STR Q17, [\reg, #-0x10]! + STR Q18, [\reg, #-0x10]! + STR Q19, [\reg, #-0x10]! + STR Q20, [\reg, #-0x10]! + STR Q21, [\reg, #-0x10]! + STR Q22, [\reg, #-0x10]! + STR Q23, [\reg, #-0x10]! + STR Q24, [\reg, #-0x10]! + STR Q25, [\reg, #-0x10]! + STR Q26, [\reg, #-0x10]! + STR Q27, [\reg, #-0x10]! + STR Q28, [\reg, #-0x10]! + STR Q29, [\reg, #-0x10]! + STR Q30, [\reg, #-0x10]! + STR Q31, [\reg, #-0x10]! .endm .macro RESTORE_FPU, reg + LDR Q31, [\reg], #0x10 + LDR Q30, [\reg], #0x10 + LDR Q29, [\reg], #0x10 + LDR Q28, [\reg], #0x10 + LDR Q27, [\reg], #0x10 + LDR Q26, [\reg], #0x10 + LDR Q25, [\reg], #0x10 + LDR Q24, [\reg], #0x10 + LDR Q23, [\reg], #0x10 + LDR Q22, [\reg], #0x10 + LDR Q21, [\reg], #0x10 + LDR Q20, [\reg], #0x10 + LDR Q19, [\reg], #0x10 + LDR Q18, [\reg], #0x10 + LDR Q17, [\reg], #0x10 + LDR Q16, [\reg], #0x10 LDR Q15, [\reg], #0x10 LDR Q14, [\reg], #0x10 LDR Q13, [\reg], #0x10 diff --git a/libcpu/aarch64/common/stack.c b/libcpu/aarch64/common/stack.c index acb07dba6e..18d6271a79 100644 --- a/libcpu/aarch64/common/stack.c +++ b/libcpu/aarch64/common/stack.c @@ -1,11 +1,12 @@ /* - * Copyright (c) 2006-2021, RT-Thread Development Team + * Copyright (c) 2006-2023, RT-Thread Development Team * * SPDX-License-Identifier: Apache-2.0 * * Change Logs: * Date Author Notes * 2021-05-12 RT-Thread init + * 2023-07-13 GuEe-GUI append fpu: Q16 ~ Q31 */ #include #include @@ -32,38 +33,12 @@ rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter, stk = (rt_ubase_t *)stack_addr; - *(--stk) = (rt_ubase_t)0; /* Q0 */ - *(--stk) = (rt_ubase_t)0; /* Q0 */ - *(--stk) = (rt_ubase_t)0; /* Q1 */ - *(--stk) = (rt_ubase_t)0; /* Q1 */ - *(--stk) = (rt_ubase_t)0; /* Q2 */ - *(--stk) = (rt_ubase_t)0; /* Q2 */ - *(--stk) = (rt_ubase_t)0; /* Q3 */ - *(--stk) = (rt_ubase_t)0; /* Q3 */ - *(--stk) = (rt_ubase_t)0; /* Q4 */ - *(--stk) = (rt_ubase_t)0; /* Q4 */ - *(--stk) = (rt_ubase_t)0; /* Q5 */ - *(--stk) = (rt_ubase_t)0; /* Q5 */ - *(--stk) = (rt_ubase_t)0; /* Q6 */ - *(--stk) = (rt_ubase_t)0; /* Q6 */ - *(--stk) = (rt_ubase_t)0; /* Q7 */ - *(--stk) = (rt_ubase_t)0; /* Q7 */ - *(--stk) = (rt_ubase_t)0; /* Q8 */ - *(--stk) = (rt_ubase_t)0; /* Q8 */ - *(--stk) = (rt_ubase_t)0; /* Q9 */ - *(--stk) = (rt_ubase_t)0; /* Q9 */ - *(--stk) = (rt_ubase_t)0; /* Q10 */ - *(--stk) = (rt_ubase_t)0; /* Q10 */ - *(--stk) = (rt_ubase_t)0; /* Q11 */ - *(--stk) = (rt_ubase_t)0; /* Q11 */ - *(--stk) = (rt_ubase_t)0; /* Q12 */ - *(--stk) = (rt_ubase_t)0; /* Q12 */ - *(--stk) = (rt_ubase_t)0; /* Q13 */ - *(--stk) = (rt_ubase_t)0; /* Q13 */ - *(--stk) = (rt_ubase_t)0; /* Q14 */ - *(--stk) = (rt_ubase_t)0; /* Q14 */ - *(--stk) = (rt_ubase_t)0; /* Q15 */ - *(--stk) = (rt_ubase_t)0; /* Q15 */ + for (int i = 0; i < 32; ++i) + { + stk -= sizeof(rt_uint128_t) / sizeof(rt_ubase_t); + + *(rt_uint128_t *)stk = (rt_uint128_t) { 0 }; + } *(--stk) = (rt_ubase_t)0; /* X1 */ *(--stk) = (rt_ubase_t)parameter; /* X0 */