[arm64] add hw thread self (#8942)

* [libcpu] arm64: Add hardware thread_self support

This patch introduces hardware-based thread self-identification
for the AArch64 architecture. It optimizes thread management by
using hardware registers to store and access the current thread's
pointer, reducing overhead and improving overall performance.

Changes include:
- Added `ARCH_USING_HW_THREAD_SELF` configuration option.
- Modified `rtdef.h`, `rtsched.h` to conditionally include
  `critical_switch_flag` based on the new config.
- Updated context management in `context_gcc.S`, `cpuport.h`
  to support hardware-based thread self.
- Enhanced `scheduler_mp.c` and `thread.c` to leverage the new
  hardware thread self feature.

These modifications ensure better scheduling and thread handling,
particularly in multi-core environments, by minimizing the
software overhead associated with thread management.

Signed-off-by: Shell <smokewood@qq.com>

* fixup: address suggestion

* fixup: rt_current_thread as global

* scheduler: add cpu object for UP scheduler

Also, maintain the rt_current_thread in cpu object on UP scheduler.

---------

Signed-off-by: Shell <smokewood@qq.com>
This commit is contained in:
Shell 2024-05-28 14:27:55 +08:00 committed by GitHub
parent b45fb59df2
commit f17f994f8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 211 additions and 80 deletions

View File

@ -740,8 +740,10 @@ struct rt_cpu
struct rt_thread *current_thread;
rt_uint8_t irq_switch_flag:1;
rt_uint8_t critical_switch_flag:1;
rt_uint8_t sched_lock_flag:1;
#ifndef ARCH_USING_HW_THREAD_SELF
rt_uint8_t critical_switch_flag:1;
#endif /* ARCH_USING_HW_THREAD_SELF */
rt_uint8_t current_priority;
rt_list_t priority_table[RT_THREAD_PRIORITY_MAX];
@ -763,10 +765,19 @@ struct rt_cpu
struct rt_cpu_usage_stats cpu_stat;
#endif
};
typedef struct rt_cpu *rt_cpu_t;
#else /* !RT_USING_SMP */
struct rt_cpu
{
struct rt_thread *current_thread;
};
#endif /* RT_USING_SMP */
typedef struct rt_cpu *rt_cpu_t;
/* Noted: As API to reject writing to this variable from application codes */
#define rt_current_thread rt_thread_self()
struct rt_thread;
#ifdef RT_USING_SMART

View File

@ -56,6 +56,10 @@ struct rt_sched_thread_ctx
rt_uint8_t sched_flag_locked:1; /**< calling thread have the scheduler locked */
rt_uint8_t sched_flag_ttmr_set:1; /**< thread timer is start */
#ifdef ARCH_USING_HW_THREAD_SELF
rt_uint8_t critical_switch_flag:1; /**< critical switch pending */
#endif /* ARCH_USING_HW_THREAD_SELF */
#ifdef RT_USING_SMP
rt_uint8_t bind_cpu; /**< thread is bind to cpu */
rt_uint8_t oncpu; /**< process on cpu */
@ -170,6 +174,7 @@ rt_err_t rt_sched_thread_timer_stop(struct rt_thread *thread);
rt_err_t rt_sched_thread_timer_start(struct rt_thread *thread);
void rt_sched_insert_thread(struct rt_thread *thread);
void rt_sched_remove_thread(struct rt_thread *thread);
struct rt_thread *rt_sched_thread_self(void);
#endif /* defined(__RT_KERNEL_SOURCE__) || defined(__RT_IPC_SOURCE__) */

View File

@ -669,6 +669,12 @@ rt_err_t rt_device_control(rt_device_t dev, int cmd, void *arg);
void rt_interrupt_enter(void);
void rt_interrupt_leave(void);
/**
* CPU object
*/
struct rt_cpu *rt_cpu_self(void);
struct rt_cpu *rt_cpu_index(int index);
#ifdef RT_USING_SMP
/*
@ -679,9 +685,6 @@ rt_base_t rt_cpus_lock(void);
void rt_cpus_unlock(rt_base_t level);
void rt_cpus_lock_status_restore(struct rt_thread *thread);
struct rt_cpu *rt_cpu_self(void);
struct rt_cpu *rt_cpu_index(int index);
#ifdef RT_USING_DEBUG
rt_base_t rt_cpu_get_id(void);
#else /* !RT_USING_DEBUG */

View File

@ -14,6 +14,8 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
default y
config ARCH_USING_GENERIC_CPUID
bool "Using generic cpuid implemenation"
select ARCH_USING_HW_THREAD_SELF
default y if RT_USING_OFW
default n
endmenu
endif
@ -270,3 +272,7 @@ config ARCH_HOST_SIMULATOR
config ARCH_CPU_STACK_GROWS_UPWARD
bool
default n
config ARCH_USING_HW_THREAD_SELF
bool
default n

View File

@ -8,6 +8,7 @@
* 2021-05-18 Jesven the first version
* 2023-06-24 WangXiaoyao Support backtrace for user thread
* 2024-01-06 Shell Fix barrier on irq_disable/enable
* 2024-01-18 Shell fix implicit dependency of cpuid management
*/
#ifndef __ASSEMBLY__
@ -27,15 +28,35 @@ rt_thread_switch_interrupt_flag: .zero 8
#endif
.text
.weak rt_hw_cpu_id_set
/**
* #ifdef RT_USING_OFW
* void rt_hw_cpu_id_set(long cpuid)
* #else
* void rt_hw_cpu_id_set(void)
* #endif
*/
.type rt_hw_cpu_id_set, @function
rt_hw_cpu_id_set:
mrs x0, mpidr_el1 /* MPIDR_EL1: Multi-Processor Affinity Register */
#ifdef ARCH_USING_GENERIC_CPUID
.globl rt_hw_cpu_id_set
#else /* !ARCH_USING_GENERIC_CPUID */
.weak rt_hw_cpu_id_set
#endif /* ARCH_USING_GENERIC_CPUID */
#ifndef RT_USING_OFW
mrs x0, mpidr_el1 /* MPIDR_EL1: Multi-Processor Affinity Register */
#ifdef ARCH_ARM_CORTEX_A55
lsr x0, x0, #8
#endif
and x0, x0, #15
msr tpidr_el1, x0
lsr x0, x0, #8
#endif /* ARCH_ARM_CORTEX_A55 */
and x0, x0, #15
#endif /* !RT_USING_OFW */
#ifdef ARCH_USING_HW_THREAD_SELF
msr tpidrro_el0, x0
#else /* !ARCH_USING_HW_THREAD_SELF */
msr tpidr_el1, x0
#endif /* ARCH_USING_HW_THREAD_SELF */
ret
/*

View File

@ -231,6 +231,24 @@ int rt_hw_cpu_boot_secondary(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_o
#endif /*RT_USING_SMP*/
/**
* Generic hw-cpu-id
*/
#ifdef ARCH_USING_GENERIC_CPUID
int rt_hw_cpu_id(void)
{
#if RT_CPUS_NR > 1
long cpuid;
__asm__ volatile("mrs %0, tpidrro_el0":"=r"(cpuid));
return cpuid;
#else
return 0;
#endif /* RT_CPUS_NR > 1 */
}
#endif /* ARCH_USING_GENERIC_CPUID */
/**
* @addtogroup ARM CPU
*/

View File

@ -7,6 +7,7 @@
* Date Author Notes
* 2023-10-25 Shell Move ffs to cpuport, add general implementation
* by inline assembly
* 2024-01-18 Shell support rt_hw_thread_self to improve overall performance
*/
#ifndef CPUPORT_H__
@ -27,31 +28,6 @@ typedef struct
rt_uint32_t value;
} rt_hw_spinlock_t;
/**
* Generic hw-cpu-id
*/
#ifdef ARCH_USING_GENERIC_CPUID
#if RT_CPUS_NR > 1
rt_inline int rt_hw_cpu_id(void)
{
long cpuid;
__asm__ volatile("mrs %0, tpidr_el1":"=r"(cpuid));
return cpuid;
}
#else
rt_inline int rt_hw_cpu_id(void)
{
return 0;
}
#endif /* RT_CPUS_NR > 1 */
#endif /* ARCH_USING_GENERIC_CPUID */
#endif /* RT_USING_SMP */
#define rt_hw_barrier(cmd, ...) \
@ -107,4 +83,20 @@ rt_inline int __rt_ffs(int value)
#endif /* RT_USING_CPU_FFS */
#ifdef ARCH_USING_HW_THREAD_SELF
rt_inline struct rt_thread *rt_hw_thread_self(void)
{
struct rt_thread *thread;
__asm__ volatile ("mrs %0, tpidr_el1":"=r"(thread));
return thread;
}
rt_inline void rt_hw_thread_set_self(struct rt_thread *thread)
{
__asm__ volatile ("msr tpidr_el1, %0"::"r"(thread));
}
#endif /* ARCH_USING_HW_THREAD_SELF */
#endif /*CPUPORT_H__*/

View File

@ -7,6 +7,7 @@
* 2020-01-15 bigmagic the first version
* 2020-08-10 SummerGift support clang compiler
* 2023-04-29 GuEe-GUI support kernel's ARM64 boot header
* 2024-01-18 Shell fix implicit dependency of cpuid management
*/
#ifndef __ASSEMBLY__
@ -95,6 +96,10 @@ _start:
/* Save cpu stack */
get_phy stack_top, .boot_cpu_stack_top
/* Save cpu id temp */
#ifdef ARCH_USING_HW_THREAD_SELF
msr tpidrro_el0, xzr
/* Save thread self */
#endif /* ARCH_USING_HW_THREAD_SELF */
msr tpidr_el1, xzr
bl init_cpu_el
@ -149,11 +154,10 @@ _secondary_cpu_entry:
/* Get cpu id success */
sub x0, x2, #1
msr tpidr_el1, x0 /* Save cpu id global */
#else
bl rt_hw_cpu_id_set
mrs x0, tpidr_el1
#endif /* RT_USING_OFW */
/* Save cpu id global */
bl rt_hw_cpu_id_set
bl rt_hw_cpu_id
/* Set current cpu's stack top */
sub x0, x0, #1

View File

@ -6,10 +6,14 @@
* Change Logs:
* Date Author Notes
* 2024-04-19 Shell Fixup UP irq spinlock
* 2024-05-22 Shell Add UP cpu object and
* maintain the rt_current_thread inside it
*/
#include <rthw.h>
#include <rtthread.h>
static struct rt_cpu _cpu;
/**
* @brief Initialize a static spinlock object.
*
@ -80,3 +84,25 @@ void rt_spin_unlock_irqrestore(struct rt_spinlock *lock, rt_base_t level)
rt_exit_critical_safe(critical_level);
rt_hw_interrupt_enable(level);
}
/**
* @brief This fucntion will return current cpu object.
*
* @return Return a pointer to the current cpu object.
*/
struct rt_cpu *rt_cpu_self(void)
{
return &_cpu;
}
/**
* @brief This fucntion will return the cpu object corresponding to index.
*
* @param index is the index of target cpu object.
*
* @return Return a pointer to the cpu object corresponding to index.
*/
struct rt_cpu *rt_cpu_index(int index)
{
return index == 0 ? &_cpu : RT_NULL;
}

View File

@ -33,6 +33,7 @@
* 2023-12-10 xqyjlj use rt_hw_spinlock
* 2024-01-05 Shell Fixup of data racing in rt_critical_level
* 2024-01-18 Shell support rt_sched_thread of scheduling status for better mt protection
* 2024-01-18 Shell support rt_hw_thread_self to improve overall performance
*/
#include <rtthread.h>
@ -99,6 +100,18 @@ static struct rt_spinlock _mp_scheduler_lock;
rt_hw_local_irq_enable(level); \
} while (0)
#ifdef ARCH_USING_HW_THREAD_SELF
#define IS_CRITICAL_SWITCH_PEND(pcpu, curthr) (RT_SCHED_CTX(curthr).critical_switch_flag)
#define SET_CRITICAL_SWITCH_FLAG(pcpu, curthr) (RT_SCHED_CTX(curthr).critical_switch_flag = 1)
#define CLR_CRITICAL_SWITCH_FLAG(pcpu, curthr) (RT_SCHED_CTX(curthr).critical_switch_flag = 0)
#else /* !ARCH_USING_HW_THREAD_SELF */
#define IS_CRITICAL_SWITCH_PEND(pcpu, curthr) ((pcpu)->critical_switch_flag)
#define SET_CRITICAL_SWITCH_FLAG(pcpu, curthr) ((pcpu)->critical_switch_flag = 1)
#define CLR_CRITICAL_SWITCH_FLAG(pcpu, curthr) ((pcpu)->critical_switch_flag = 0)
#endif /* ARCH_USING_HW_THREAD_SELF */
static rt_uint32_t rt_thread_ready_priority_group;
#if RT_THREAD_PRIORITY_MAX > 32
/* Maximum priority level, 256 */
@ -749,7 +762,7 @@ rt_err_t rt_sched_unlock_n_resched(rt_sched_lock_level_t level)
/* leaving critical region of global context since we can't schedule */
SCHEDULER_CONTEXT_UNLOCK(pcpu);
pcpu->critical_switch_flag = 1;
SET_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
error = -RT_ESCHEDLOCKED;
SCHEDULER_EXIT_CRITICAL(current_thread);
@ -757,7 +770,7 @@ rt_err_t rt_sched_unlock_n_resched(rt_sched_lock_level_t level)
else
{
/* flush critical switch flag since a scheduling is done */
pcpu->critical_switch_flag = 0;
CLR_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
/* pick the highest runnable thread, and pass the control to it */
to_thread = _prepare_context_switch_locked(cpu_id, pcpu, current_thread);
@ -828,7 +841,7 @@ void rt_schedule(void)
/* whether caller had locked the local scheduler already */
if (RT_SCHED_CTX(current_thread).critical_lock_nest > 1)
{
pcpu->critical_switch_flag = 1;
SET_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
SCHEDULER_EXIT_CRITICAL(current_thread);
@ -837,7 +850,7 @@ void rt_schedule(void)
else
{
/* flush critical switch flag since a scheduling is done */
pcpu->critical_switch_flag = 0;
CLR_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
pcpu->irq_switch_flag = 0;
/**
@ -912,13 +925,13 @@ void rt_scheduler_do_irq_switch(void *context)
/* whether caller had locked the local scheduler already */
if (RT_SCHED_CTX(current_thread).critical_lock_nest > 1)
{
pcpu->critical_switch_flag = 1;
SET_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
SCHEDULER_EXIT_CRITICAL(current_thread);
}
else if (rt_atomic_load(&(pcpu->irq_nest)) == 0)
{
/* flush critical & irq switch flag since a scheduling is done */
pcpu->critical_switch_flag = 0;
CLR_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
pcpu->irq_switch_flag = 0;
SCHEDULER_CONTEXT_LOCK(pcpu);
@ -1056,6 +1069,9 @@ void rt_sched_post_ctx_switch(struct rt_thread *thread)
}
/* safe to access since irq is masked out */
pcpu->current_thread = thread;
#ifdef ARCH_USING_HW_THREAD_SELF
rt_hw_thread_set_self(thread);
#endif /* ARCH_USING_HW_THREAD_SELF */
}
#ifdef RT_DEBUGING_CRITICAL
@ -1096,14 +1112,28 @@ void rt_exit_critical_safe(rt_base_t critical_level)
#endif /* RT_DEBUGING_CRITICAL */
RTM_EXPORT(rt_exit_critical_safe);
#ifdef ARCH_USING_HW_THREAD_SELF
#define FREE_THREAD_SELF(lvl)
#else /* !ARCH_USING_HW_THREAD_SELF */
#define FREE_THREAD_SELF(lvl) \
do \
{ \
rt_hw_local_irq_enable(lvl); \
} while (0)
#endif /* ARCH_USING_HW_THREAD_SELF */
/**
* @brief This function will lock the thread scheduler.
*/
rt_base_t rt_enter_critical(void)
{
rt_base_t level;
rt_base_t critical_level;
struct rt_thread *current_thread;
#ifndef ARCH_USING_HW_THREAD_SELF
rt_base_t level;
struct rt_cpu *pcpu;
/* disable interrupt */
@ -1111,9 +1141,15 @@ rt_base_t rt_enter_critical(void)
pcpu = rt_cpu_self();
current_thread = pcpu->current_thread;
#else /* !ARCH_USING_HW_THREAD_SELF */
current_thread = rt_hw_thread_self();
#endif /* ARCH_USING_HW_THREAD_SELF */
if (!current_thread)
{
rt_hw_local_irq_enable(level);
FREE_THREAD_SELF(level);
/* scheduler unavailable */
return -RT_EINVAL;
}
@ -1122,8 +1158,7 @@ rt_base_t rt_enter_critical(void)
RT_SCHED_CTX(current_thread).critical_lock_nest++;
critical_level = RT_SCHED_CTX(current_thread).critical_lock_nest;
/* enable interrupt */
rt_hw_local_irq_enable(level);
FREE_THREAD_SELF(level);
return critical_level;
}
@ -1134,9 +1169,11 @@ RTM_EXPORT(rt_enter_critical);
*/
void rt_exit_critical(void)
{
rt_base_t level;
struct rt_thread *current_thread;
rt_bool_t need_resched;
#ifndef ARCH_USING_HW_THREAD_SELF
rt_base_t level;
struct rt_cpu *pcpu;
/* disable interrupt */
@ -1144,9 +1181,15 @@ void rt_exit_critical(void)
pcpu = rt_cpu_self();
current_thread = pcpu->current_thread;
#else /* !ARCH_USING_HW_THREAD_SELF */
current_thread = rt_hw_thread_self();
#endif /* ARCH_USING_HW_THREAD_SELF */
if (!current_thread)
{
rt_hw_local_irq_enable(level);
FREE_THREAD_SELF(level);
return;
}
@ -1157,11 +1200,10 @@ void rt_exit_critical(void)
if (RT_SCHED_CTX(current_thread).critical_lock_nest == 0)
{
/* is there any scheduling request unfinished? */
need_resched = pcpu->critical_switch_flag;
pcpu->critical_switch_flag = 0;
need_resched = IS_CRITICAL_SWITCH_PEND(pcpu, current_thread);
CLR_CRITICAL_SWITCH_FLAG(pcpu, current_thread);
/* enable interrupt */
rt_hw_local_irq_enable(level);
FREE_THREAD_SELF(level);
if (need_resched)
rt_schedule();
@ -1171,8 +1213,7 @@ void rt_exit_critical(void)
/* each exit_critical is strictly corresponding to an enter_critical */
RT_ASSERT(RT_SCHED_CTX(current_thread).critical_lock_nest > 0);
/* enable interrupt */
rt_hw_local_irq_enable(level);
FREE_THREAD_SELF(level);
}
}
RTM_EXPORT(rt_exit_critical);

View File

@ -48,7 +48,6 @@ rt_uint8_t rt_thread_ready_table[32];
extern volatile rt_uint8_t rt_interrupt_nest;
static rt_int16_t rt_scheduler_lock_nest;
struct rt_thread *rt_current_thread = RT_NULL;
rt_uint8_t rt_current_priority;
#if defined(RT_USING_HOOK) && defined(RT_HOOK_USING_FUNC_PTR)
@ -175,7 +174,7 @@ void rt_system_scheduler_start(void)
to_thread = _scheduler_get_highest_priority_thread(&highest_ready_priority);
rt_current_thread = to_thread;
rt_cpu_self()->current_thread = to_thread;
rt_sched_remove_thread(to_thread);
RT_SCHED_CTX(to_thread).stat = RT_THREAD_RUNNING;
@ -203,6 +202,8 @@ void rt_schedule(void)
rt_base_t level;
struct rt_thread *to_thread;
struct rt_thread *from_thread;
/* using local variable to avoid unecessary function call */
struct rt_thread *curr_thread = rt_thread_self();
/* disable interrupt */
level = rt_hw_interrupt_disable();
@ -219,15 +220,16 @@ void rt_schedule(void)
to_thread = _scheduler_get_highest_priority_thread(&highest_ready_priority);
if ((RT_SCHED_CTX(rt_current_thread).stat & RT_THREAD_STAT_MASK) == RT_THREAD_RUNNING)
if ((RT_SCHED_CTX(curr_thread).stat & RT_THREAD_STAT_MASK) == RT_THREAD_RUNNING)
{
if (RT_SCHED_PRIV(rt_current_thread).current_priority < highest_ready_priority)
if (RT_SCHED_PRIV(curr_thread).current_priority < highest_ready_priority)
{
to_thread = rt_current_thread;
to_thread = curr_thread;
}
else if (RT_SCHED_PRIV(rt_current_thread).current_priority == highest_ready_priority && (RT_SCHED_CTX(rt_current_thread).stat & RT_THREAD_STAT_YIELD_MASK) == 0)
else if (RT_SCHED_PRIV(curr_thread).current_priority == highest_ready_priority
&& (RT_SCHED_CTX(curr_thread).stat & RT_THREAD_STAT_YIELD_MASK) == 0)
{
to_thread = rt_current_thread;
to_thread = curr_thread;
}
else
{
@ -235,12 +237,12 @@ void rt_schedule(void)
}
}
if (to_thread != rt_current_thread)
if (to_thread != curr_thread)
{
/* if the destination thread is not the same as current thread */
rt_current_priority = (rt_uint8_t)highest_ready_priority;
from_thread = rt_current_thread;
rt_current_thread = to_thread;
from_thread = curr_thread;
rt_cpu_self()->current_thread = to_thread;
RT_OBJECT_HOOK_CALL(rt_scheduler_hook, (from_thread, to_thread));
@ -282,11 +284,11 @@ void rt_schedule(void)
#ifdef RT_USING_SIGNALS
/* check stat of thread for signal */
level = rt_hw_interrupt_disable();
if (RT_SCHED_CTX(rt_current_thread).stat & RT_THREAD_STAT_SIGNAL_PENDING)
if (RT_SCHED_CTX(curr_thread).stat & RT_THREAD_STAT_SIGNAL_PENDING)
{
extern void rt_thread_handle_sig(rt_bool_t clean_state);
RT_SCHED_CTX(rt_current_thread).stat &= ~RT_THREAD_STAT_SIGNAL_PENDING;
RT_SCHED_CTX(curr_thread).stat &= ~RT_THREAD_STAT_SIGNAL_PENDING;
rt_hw_interrupt_enable(level);
@ -310,8 +312,8 @@ void rt_schedule(void)
}
else
{
rt_sched_remove_thread(rt_current_thread);
RT_SCHED_CTX(rt_current_thread).stat = RT_THREAD_RUNNING | (RT_SCHED_CTX(rt_current_thread).stat & ~RT_THREAD_STAT_MASK);
rt_sched_remove_thread(curr_thread);
RT_SCHED_CTX(curr_thread).stat = RT_THREAD_RUNNING | (RT_SCHED_CTX(curr_thread).stat & ~RT_THREAD_STAT_MASK);
}
}
}

View File

@ -355,20 +355,22 @@ RTM_EXPORT(rt_thread_init);
*/
rt_thread_t rt_thread_self(void)
{
#ifdef RT_USING_SMP
rt_base_t lock;
#ifndef RT_USING_SMP
return rt_cpu_self()->current_thread;
#elif defined (ARCH_USING_HW_THREAD_SELF)
return rt_hw_thread_self();
#else /* !ARCH_USING_HW_THREAD_SELF */
rt_thread_t self;
rt_base_t lock;
lock = rt_hw_local_irq_disable();
self = rt_cpu_self()->current_thread;
rt_hw_local_irq_enable(lock);
return self;
#else /* !RT_USING_SMP */
extern rt_thread_t rt_current_thread;
return rt_current_thread;
#endif /* RT_USING_SMP */
#endif /* ARCH_USING_HW_THREAD_SELF */
}
RTM_EXPORT(rt_thread_self);