AArch64: fixup fpu storage's size in stack and append Q16 ~ Q31 (#7815)

Signed-off-by: GuEe-GUI <GuEe-GUI@github.com>
This commit is contained in:
GUI 2023-07-15 15:21:23 +08:00 committed by GitHub
parent 2741bec8f7
commit b424169e17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 171 additions and 176 deletions

View File

@ -8,9 +8,14 @@
* 2021-05-18 Jesven first version * 2021-05-18 Jesven first version
*/ */
#ifndef __ASSEMBLY__
#define __ASSEMBLY__
#endif
#include "rtconfig.h" #include "rtconfig.h"
#include "asm-generic.h" #include "asm-generic.h"
#include "asm-fpu.h" #include "asm-fpu.h"
#include "armv8.h"
/********************* /*********************
* SPSR BIT * * SPSR BIT *
@ -32,67 +37,6 @@
#define SPSR_Z(v) ((v) << 30) #define SPSR_Z(v) ((v) << 30)
#define SPSR_N(v) ((v) << 31) #define SPSR_N(v) ((v) << 31)
/*********************
* CONTEXT_OFFSET *
*********************/
#define CONTEXT_OFFSET_ELR_EL1 0x0
#define CONTEXT_OFFSET_SPSR_EL1 0x8
#define CONTEXT_OFFSET_SP_EL0 0x10
#define CONTEXT_OFFSET_X30 0x18
#define CONTEXT_OFFSET_FPCR 0x20
#define CONTEXT_OFFSET_FPSR 0x28
#define CONTEXT_OFFSET_X28 0x30
#define CONTEXT_OFFSET_X29 0x38
#define CONTEXT_OFFSET_X26 0x40
#define CONTEXT_OFFSET_X27 0x48
#define CONTEXT_OFFSET_X24 0x50
#define CONTEXT_OFFSET_X25 0x58
#define CONTEXT_OFFSET_X22 0x60
#define CONTEXT_OFFSET_X23 0x68
#define CONTEXT_OFFSET_X20 0x70
#define CONTEXT_OFFSET_X21 0x78
#define CONTEXT_OFFSET_X18 0x80
#define CONTEXT_OFFSET_X19 0x88
#define CONTEXT_OFFSET_X16 0x90
#define CONTEXT_OFFSET_X17 0x98
#define CONTEXT_OFFSET_X14 0xa0
#define CONTEXT_OFFSET_X15 0xa8
#define CONTEXT_OFFSET_X12 0xb0
#define CONTEXT_OFFSET_X13 0xb8
#define CONTEXT_OFFSET_X10 0xc0
#define CONTEXT_OFFSET_X11 0xc8
#define CONTEXT_OFFSET_X8 0xd0
#define CONTEXT_OFFSET_X9 0xd8
#define CONTEXT_OFFSET_X6 0xe0
#define CONTEXT_OFFSET_X7 0xe8
#define CONTEXT_OFFSET_X4 0xf0
#define CONTEXT_OFFSET_X5 0xf8
#define CONTEXT_OFFSET_X2 0x100
#define CONTEXT_OFFSET_X3 0x108
#define CONTEXT_OFFSET_X0 0x110
#define CONTEXT_OFFSET_X1 0x118
#define CONTEXT_OFFSET_Q15 0x120
#define CONTEXT_OFFSET_Q14 0x130
#define CONTEXT_OFFSET_Q13 0x140
#define CONTEXT_OFFSET_Q12 0x150
#define CONTEXT_OFFSET_Q11 0x160
#define CONTEXT_OFFSET_Q10 0x170
#define CONTEXT_OFFSET_Q9 0x180
#define CONTEXT_OFFSET_Q8 0x190
#define CONTEXT_OFFSET_Q7 0x1a0
#define CONTEXT_OFFSET_Q6 0x1b0
#define CONTEXT_OFFSET_Q5 0x1c0
#define CONTEXT_OFFSET_Q4 0x1d0
#define CONTEXT_OFFSET_Q3 0x1e0
#define CONTEXT_OFFSET_Q2 0x1f0
#define CONTEXT_OFFSET_Q1 0x200
#define CONTEXT_OFFSET_Q0 0x210
#define CONTEXT_FPU_SIZE 0x100
#define CONTEXT_SIZE 0x220
/**************************************************/ /**************************************************/
.text .text
@ -370,50 +314,6 @@ arch_ret_to_user:
1: 1:
eret eret
/*
struct rt_hw_exp_stack
{
unsigned long pc; 0
unsigned long cpsr;
unsigned long sp_el0; 0x10
unsigned long x30;
unsigned long fpcr; 0x20
unsigned long fpsr;
unsigned long x28; 0x30
unsigned long x29;
unsigned long x26; 0x40
unsigned long x27;
unsigned long x24; 0x50
unsigned long x25;
unsigned long x22; 0x60
unsigned long x23;
unsigned long x20; 0x70
unsigned long x21;
unsigned long x18; 0x80
unsigned long x19;
unsigned long x16; 0x90
unsigned long x17;
unsigned long x14; 0xa0
unsigned long x15;
unsigned long x12; 0xb0
unsigned long x13;
unsigned long x10; 0xc0
unsigned long x11;
unsigned long x8; 0xd0
unsigned long x9;
unsigned long x6; 0xe0
unsigned long x7;
unsigned long x4; 0xf0
unsigned long x5;
unsigned long x2; 0x100
unsigned long x3;
unsigned long x0; 0x110
unsigned long x1;
unsigned long long fpu[16]; 0x120
0x220 = 0x120 + 0x10 * 0x10
};
*/
.global lwp_check_debug .global lwp_check_debug
lwp_check_debug: lwp_check_debug:
ldr x0, =rt_dbg_ops ldr x0, =rt_dbg_ops

View File

@ -11,47 +11,132 @@
#ifndef __ARMV8_H__ #ifndef __ARMV8_H__
#define __ARMV8_H__ #define __ARMV8_H__
#ifdef __ASSEMBLY__
/*********************
* CONTEXT_OFFSET *
*********************/
#define CONTEXT_OFFSET_ELR_EL1 0x0
#define CONTEXT_OFFSET_SPSR_EL1 0x8
#define CONTEXT_OFFSET_SP_EL0 0x10
#define CONTEXT_OFFSET_X30 0x18
#define CONTEXT_OFFSET_FPCR 0x20
#define CONTEXT_OFFSET_FPSR 0x28
#define CONTEXT_OFFSET_X28 0x30
#define CONTEXT_OFFSET_X29 0x38
#define CONTEXT_OFFSET_X26 0x40
#define CONTEXT_OFFSET_X27 0x48
#define CONTEXT_OFFSET_X24 0x50
#define CONTEXT_OFFSET_X25 0x58
#define CONTEXT_OFFSET_X22 0x60
#define CONTEXT_OFFSET_X23 0x68
#define CONTEXT_OFFSET_X20 0x70
#define CONTEXT_OFFSET_X21 0x78
#define CONTEXT_OFFSET_X18 0x80
#define CONTEXT_OFFSET_X19 0x88
#define CONTEXT_OFFSET_X16 0x90
#define CONTEXT_OFFSET_X17 0x98
#define CONTEXT_OFFSET_X14 0xa0
#define CONTEXT_OFFSET_X15 0xa8
#define CONTEXT_OFFSET_X12 0xb0
#define CONTEXT_OFFSET_X13 0xb8
#define CONTEXT_OFFSET_X10 0xc0
#define CONTEXT_OFFSET_X11 0xc8
#define CONTEXT_OFFSET_X8 0xd0
#define CONTEXT_OFFSET_X9 0xd8
#define CONTEXT_OFFSET_X6 0xe0
#define CONTEXT_OFFSET_X7 0xe8
#define CONTEXT_OFFSET_X4 0xf0
#define CONTEXT_OFFSET_X5 0xf8
#define CONTEXT_OFFSET_X2 0x100
#define CONTEXT_OFFSET_X3 0x108
#define CONTEXT_OFFSET_X0 0x110
#define CONTEXT_OFFSET_X1 0x118
#define CONTEXT_OFFSET_Q31 0x120
#define CONTEXT_OFFSET_Q30 0x130
#define CONTEXT_OFFSET_Q29 0x140
#define CONTEXT_OFFSET_Q28 0x150
#define CONTEXT_OFFSET_Q27 0x160
#define CONTEXT_OFFSET_Q26 0x170
#define CONTEXT_OFFSET_Q25 0x180
#define CONTEXT_OFFSET_Q24 0x190
#define CONTEXT_OFFSET_Q23 0x1a0
#define CONTEXT_OFFSET_Q22 0x1b0
#define CONTEXT_OFFSET_Q21 0x1c0
#define CONTEXT_OFFSET_Q20 0x1d0
#define CONTEXT_OFFSET_Q19 0x1e0
#define CONTEXT_OFFSET_Q18 0x1f0
#define CONTEXT_OFFSET_Q17 0x200
#define CONTEXT_OFFSET_Q16 0x210
#define CONTEXT_OFFSET_Q15 0x220
#define CONTEXT_OFFSET_Q14 0x230
#define CONTEXT_OFFSET_Q13 0x240
#define CONTEXT_OFFSET_Q12 0x250
#define CONTEXT_OFFSET_Q11 0x260
#define CONTEXT_OFFSET_Q10 0x270
#define CONTEXT_OFFSET_Q9 0x280
#define CONTEXT_OFFSET_Q8 0x290
#define CONTEXT_OFFSET_Q7 0x2a0
#define CONTEXT_OFFSET_Q6 0x2b0
#define CONTEXT_OFFSET_Q5 0x2c0
#define CONTEXT_OFFSET_Q4 0x2d0
#define CONTEXT_OFFSET_Q3 0x2e0
#define CONTEXT_OFFSET_Q2 0x2f0
#define CONTEXT_OFFSET_Q1 0x300
#define CONTEXT_OFFSET_Q0 0x310
#define CONTEXT_FPU_SIZE (32 * 16)
#define CONTEXT_SIZE (0x120 + CONTEXT_FPU_SIZE)
#else /* !__ASSEMBLY__ */
#include <rtdef.h>
typedef struct { rt_uint64_t value[2]; } rt_uint128_t;
/* the exception stack without VFP registers */ /* the exception stack without VFP registers */
struct rt_hw_exp_stack struct rt_hw_exp_stack
{ {
unsigned long pc; rt_uint64_t pc;
unsigned long cpsr; rt_uint64_t cpsr;
unsigned long sp_el0; rt_uint64_t sp_el0;
unsigned long x30; rt_uint64_t x30;
unsigned long fpcr; rt_uint64_t fpcr;
unsigned long fpsr; rt_uint64_t fpsr;
unsigned long x28; rt_uint64_t x28;
unsigned long x29; rt_uint64_t x29;
unsigned long x26; rt_uint64_t x26;
unsigned long x27; rt_uint64_t x27;
unsigned long x24; rt_uint64_t x24;
unsigned long x25; rt_uint64_t x25;
unsigned long x22; rt_uint64_t x22;
unsigned long x23; rt_uint64_t x23;
unsigned long x20; rt_uint64_t x20;
unsigned long x21; rt_uint64_t x21;
unsigned long x18; rt_uint64_t x18;
unsigned long x19; rt_uint64_t x19;
unsigned long x16; rt_uint64_t x16;
unsigned long x17; rt_uint64_t x17;
unsigned long x14; rt_uint64_t x14;
unsigned long x15; rt_uint64_t x15;
unsigned long x12; rt_uint64_t x12;
unsigned long x13; rt_uint64_t x13;
unsigned long x10; rt_uint64_t x10;
unsigned long x11; rt_uint64_t x11;
unsigned long x8; rt_uint64_t x8;
unsigned long x9; rt_uint64_t x9;
unsigned long x6; rt_uint64_t x6;
unsigned long x7; rt_uint64_t x7;
unsigned long x4; rt_uint64_t x4;
unsigned long x5; rt_uint64_t x5;
unsigned long x2; rt_uint64_t x2;
unsigned long x3; rt_uint64_t x3;
unsigned long x0; rt_uint64_t x0;
unsigned long x1; rt_uint64_t x1;
unsigned long long fpu[16]; rt_uint128_t fpu[32];
}; };
#define SP_ELx ((unsigned long)0x01) #define SP_ELx ((unsigned long)0x01)
@ -64,4 +149,6 @@ rt_ubase_t rt_hw_get_current_el(void);
void rt_hw_set_elx_env(void); void rt_hw_set_elx_env(void);
void rt_hw_set_current_vbar(rt_ubase_t addr); void rt_hw_set_current_vbar(rt_ubase_t addr);
#endif /* __ASSEMBLY__ */
#endif #endif

View File

@ -1,11 +1,12 @@
/* /*
* Copyright (c) 2006-2021, RT-Thread Development Team * Copyright (c) 2006-2023, RT-Thread Development Team
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
* *
* Change Logs: * Change Logs:
* Date Author Notes * Date Author Notes
* 2021-05-18 Jesven the first version * 2021-05-18 Jesven the first version
* 2023-07-13 GuEe-GUI append Q16 ~ Q31
*/ */
.macro SAVE_FPU, reg .macro SAVE_FPU, reg
@ -25,8 +26,40 @@
STR Q13, [\reg, #-0x10]! STR Q13, [\reg, #-0x10]!
STR Q14, [\reg, #-0x10]! STR Q14, [\reg, #-0x10]!
STR Q15, [\reg, #-0x10]! STR Q15, [\reg, #-0x10]!
STR Q16, [\reg, #-0x10]!
STR Q17, [\reg, #-0x10]!
STR Q18, [\reg, #-0x10]!
STR Q19, [\reg, #-0x10]!
STR Q20, [\reg, #-0x10]!
STR Q21, [\reg, #-0x10]!
STR Q22, [\reg, #-0x10]!
STR Q23, [\reg, #-0x10]!
STR Q24, [\reg, #-0x10]!
STR Q25, [\reg, #-0x10]!
STR Q26, [\reg, #-0x10]!
STR Q27, [\reg, #-0x10]!
STR Q28, [\reg, #-0x10]!
STR Q29, [\reg, #-0x10]!
STR Q30, [\reg, #-0x10]!
STR Q31, [\reg, #-0x10]!
.endm .endm
.macro RESTORE_FPU, reg .macro RESTORE_FPU, reg
LDR Q31, [\reg], #0x10
LDR Q30, [\reg], #0x10
LDR Q29, [\reg], #0x10
LDR Q28, [\reg], #0x10
LDR Q27, [\reg], #0x10
LDR Q26, [\reg], #0x10
LDR Q25, [\reg], #0x10
LDR Q24, [\reg], #0x10
LDR Q23, [\reg], #0x10
LDR Q22, [\reg], #0x10
LDR Q21, [\reg], #0x10
LDR Q20, [\reg], #0x10
LDR Q19, [\reg], #0x10
LDR Q18, [\reg], #0x10
LDR Q17, [\reg], #0x10
LDR Q16, [\reg], #0x10
LDR Q15, [\reg], #0x10 LDR Q15, [\reg], #0x10
LDR Q14, [\reg], #0x10 LDR Q14, [\reg], #0x10
LDR Q13, [\reg], #0x10 LDR Q13, [\reg], #0x10

View File

@ -1,11 +1,12 @@
/* /*
* Copyright (c) 2006-2021, RT-Thread Development Team * Copyright (c) 2006-2023, RT-Thread Development Team
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
* *
* Change Logs: * Change Logs:
* Date Author Notes * Date Author Notes
* 2021-05-12 RT-Thread init * 2021-05-12 RT-Thread init
* 2023-07-13 GuEe-GUI append fpu: Q16 ~ Q31
*/ */
#include <board.h> #include <board.h>
#include <rtthread.h> #include <rtthread.h>
@ -32,38 +33,12 @@ rt_uint8_t *rt_hw_stack_init(void *tentry, void *parameter,
stk = (rt_ubase_t *)stack_addr; stk = (rt_ubase_t *)stack_addr;
*(--stk) = (rt_ubase_t)0; /* Q0 */ for (int i = 0; i < 32; ++i)
*(--stk) = (rt_ubase_t)0; /* Q0 */ {
*(--stk) = (rt_ubase_t)0; /* Q1 */ stk -= sizeof(rt_uint128_t) / sizeof(rt_ubase_t);
*(--stk) = (rt_ubase_t)0; /* Q1 */
*(--stk) = (rt_ubase_t)0; /* Q2 */ *(rt_uint128_t *)stk = (rt_uint128_t) { 0 };
*(--stk) = (rt_ubase_t)0; /* Q2 */ }
*(--stk) = (rt_ubase_t)0; /* Q3 */
*(--stk) = (rt_ubase_t)0; /* Q3 */
*(--stk) = (rt_ubase_t)0; /* Q4 */
*(--stk) = (rt_ubase_t)0; /* Q4 */
*(--stk) = (rt_ubase_t)0; /* Q5 */
*(--stk) = (rt_ubase_t)0; /* Q5 */
*(--stk) = (rt_ubase_t)0; /* Q6 */
*(--stk) = (rt_ubase_t)0; /* Q6 */
*(--stk) = (rt_ubase_t)0; /* Q7 */
*(--stk) = (rt_ubase_t)0; /* Q7 */
*(--stk) = (rt_ubase_t)0; /* Q8 */
*(--stk) = (rt_ubase_t)0; /* Q8 */
*(--stk) = (rt_ubase_t)0; /* Q9 */
*(--stk) = (rt_ubase_t)0; /* Q9 */
*(--stk) = (rt_ubase_t)0; /* Q10 */
*(--stk) = (rt_ubase_t)0; /* Q10 */
*(--stk) = (rt_ubase_t)0; /* Q11 */
*(--stk) = (rt_ubase_t)0; /* Q11 */
*(--stk) = (rt_ubase_t)0; /* Q12 */
*(--stk) = (rt_ubase_t)0; /* Q12 */
*(--stk) = (rt_ubase_t)0; /* Q13 */
*(--stk) = (rt_ubase_t)0; /* Q13 */
*(--stk) = (rt_ubase_t)0; /* Q14 */
*(--stk) = (rt_ubase_t)0; /* Q14 */
*(--stk) = (rt_ubase_t)0; /* Q15 */
*(--stk) = (rt_ubase_t)0; /* Q15 */
*(--stk) = (rt_ubase_t)0; /* X1 */ *(--stk) = (rt_ubase_t)0; /* X1 */
*(--stk) = (rt_ubase_t)parameter; /* X0 */ *(--stk) = (rt_ubase_t)parameter; /* X0 */