[libcpu/arm64] add C11 atomic ticket spinlock (#8882)
* [libcpu/arm64] add C11 atomic ticket spinlock Replace the former implementation of flag-based spinlock which is unfair Besides, C11 atomic implementation is more readable (it's C anyway), and maintainable. Cause toolchain can use their builtin optimization and tune for different micro-architectures. For example armv8.5 introduces a better instruction. The compiler can help with that when it knows your target platform in support of it. Signed-off-by: Shell <smokewood@qq.com> * fixup: RT_CPUS_NR --------- Signed-off-by: Shell <smokewood@qq.com>
This commit is contained in:
parent
e46333496f
commit
e25fc8b511
|
@ -29,7 +29,7 @@
|
||||||
#error the thread priority should at least be greater than idle
|
#error the thread priority should at least be greater than idle
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static rt_atomic_t _star_counter = 1;
|
static rt_atomic_t _star_counter;
|
||||||
static struct rt_semaphore _thr_exit_sem;
|
static struct rt_semaphore _thr_exit_sem;
|
||||||
static struct rt_semaphore _level_waiting[TEST_LEVEL_COUNTS];
|
static struct rt_semaphore _level_waiting[TEST_LEVEL_COUNTS];
|
||||||
static rt_thread_t _thread_matrix[TEST_LEVEL_COUNTS][KERN_TEST_CONCURRENT_THREADS];
|
static rt_thread_t _thread_matrix[TEST_LEVEL_COUNTS][KERN_TEST_CONCURRENT_THREADS];
|
||||||
|
@ -157,6 +157,8 @@ static void scheduler_tc(void)
|
||||||
static rt_err_t utest_tc_init(void)
|
static rt_err_t utest_tc_init(void)
|
||||||
{
|
{
|
||||||
LOG_I("Setup environment...");
|
LOG_I("Setup environment...");
|
||||||
|
_star_counter = 1;
|
||||||
|
rt_memset(_load_average, 0, sizeof(_load_average));
|
||||||
rt_sem_init(&_thr_exit_sem, "test", 0, RT_IPC_FLAG_PRIO);
|
rt_sem_init(&_thr_exit_sem, "test", 0, RT_IPC_FLAG_PRIO);
|
||||||
|
|
||||||
for (size_t i = 0; i < TEST_LEVEL_COUNTS; i++)
|
for (size_t i = 0; i < TEST_LEVEL_COUNTS; i++)
|
||||||
|
|
|
@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
|
||||||
config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||||
bool
|
bool
|
||||||
default y
|
default y
|
||||||
|
config ARCH_USING_GENERIC_CPUID
|
||||||
|
bool "Using generic cpuid implemenation"
|
||||||
|
default n
|
||||||
endmenu
|
endmenu
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
|
||||||
.weak rt_hw_cpu_id
|
.weak rt_hw_cpu_id
|
||||||
.type rt_hw_cpu_id, @function
|
.type rt_hw_cpu_id, @function
|
||||||
rt_hw_cpu_id:
|
rt_hw_cpu_id:
|
||||||
|
#if RT_CPUS_NR > 1
|
||||||
mrs x0, tpidr_el1
|
mrs x0, tpidr_el1
|
||||||
|
#else
|
||||||
|
mov x0, xzr
|
||||||
|
#endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
* 2011-09-15 Bernard first version
|
* 2011-09-15 Bernard first version
|
||||||
* 2019-07-28 zdzn add smp support
|
* 2019-07-28 zdzn add smp support
|
||||||
* 2023-02-21 GuEe-GUI mov cpu ofw init to setup
|
* 2023-02-21 GuEe-GUI mov cpu ofw init to setup
|
||||||
|
* 2024-04-29 Shell Add generic ticket spinlock using C11 atomic
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <rthw.h>
|
#include <rthw.h>
|
||||||
|
@ -55,65 +56,101 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
|
||||||
};
|
};
|
||||||
#endif /* RT_USING_SMART */
|
#endif /* RT_USING_SMART */
|
||||||
|
|
||||||
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
/* in support of C11 atomic */
|
||||||
{
|
#if __STDC_VERSION__ >= 201112L
|
||||||
unsigned int tmp;
|
#include <stdatomic.h>
|
||||||
|
|
||||||
asm volatile(
|
union _spinlock
|
||||||
" sevl\n"
|
{
|
||||||
"1: wfe\n"
|
_Atomic(rt_uint32_t) _value;
|
||||||
"2: ldaxr %w0, %1\n"
|
struct
|
||||||
" cbnz %w0, 1b\n"
|
{
|
||||||
" stxr %w0, %w2, %1\n"
|
_Atomic(rt_uint16_t) owner;
|
||||||
" cbnz %w0, 2b\n"
|
_Atomic(rt_uint16_t) next;
|
||||||
: "=&r" (tmp), "+Q" (lock->lock)
|
} ticket;
|
||||||
: "r" (1)
|
};
|
||||||
: "cc", "memory");
|
|
||||||
|
void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
|
||||||
|
{
|
||||||
|
union _spinlock *lock = (void *)_lock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* just a dummy note that this is an atomic operation, though it alway is
|
||||||
|
* even without usage of atomic API in arm64
|
||||||
|
*/
|
||||||
|
atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
|
||||||
{
|
{
|
||||||
unsigned int tmp;
|
rt_bool_t rc;
|
||||||
|
rt_uint32_t readonce;
|
||||||
|
union _spinlock temp;
|
||||||
|
union _spinlock *lock = (void *)_lock;
|
||||||
|
|
||||||
asm volatile(
|
readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
|
||||||
" ldaxr %w0, %1\n"
|
temp._value = readonce;
|
||||||
" cbnz %w0, 1f\n"
|
|
||||||
" stxr %w0, %w2, %1\n"
|
|
||||||
"1:\n"
|
|
||||||
: "=&r" (tmp), "+Q" (lock->lock)
|
|
||||||
: "r" (1)
|
|
||||||
: "cc", "memory");
|
|
||||||
|
|
||||||
return !tmp;
|
if (temp.ticket.owner != temp.ticket.next)
|
||||||
|
{
|
||||||
|
rc = RT_FALSE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
temp.ticket.next += 1;
|
||||||
|
rc = atomic_compare_exchange_strong_explicit(
|
||||||
|
&lock->_value, &readonce, temp._value,
|
||||||
|
memory_order_acquire, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
rt_inline rt_base_t _load_acq_exclusive(_Atomic(rt_uint16_t) *halfword)
|
||||||
{
|
{
|
||||||
asm volatile(
|
rt_uint32_t old;
|
||||||
" stlr %w1, %0\n"
|
__asm__ volatile("ldaxrh %w0, [%1]"
|
||||||
: "=Q" (lock->lock) : "r" (0) : "memory");
|
: "=&r"(old)
|
||||||
|
: "r"(halfword)
|
||||||
|
: "memory");
|
||||||
|
return old;
|
||||||
}
|
}
|
||||||
|
|
||||||
void rt_hw_spin_lock_init(arch_spinlock_t *lock)
|
rt_inline void _send_event_local(void)
|
||||||
{
|
{
|
||||||
lock->lock = 0;
|
__asm__ volatile("sevl");
|
||||||
}
|
}
|
||||||
|
|
||||||
void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
|
rt_inline void _wait_for_event(void)
|
||||||
{
|
{
|
||||||
arch_spin_lock(lock);
|
__asm__ volatile("wfe" ::: "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
|
void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
|
||||||
{
|
{
|
||||||
arch_spin_unlock(lock);
|
union _spinlock *lock = (void *)_lock;
|
||||||
|
rt_uint16_t ticket =
|
||||||
|
atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
|
||||||
|
|
||||||
|
if (atomic_load_explicit(&lock->ticket.owner, memory_order_acquire) !=
|
||||||
|
ticket)
|
||||||
|
{
|
||||||
|
_send_event_local();
|
||||||
|
do
|
||||||
|
{
|
||||||
|
_wait_for_event();
|
||||||
|
}
|
||||||
|
while (_load_acq_exclusive(&lock->ticket.owner) != ticket);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
|
void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
|
||||||
{
|
{
|
||||||
return arch_spin_trylock(lock);
|
union _spinlock *lock = (void *)_lock;
|
||||||
|
atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
|
static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
|
||||||
{
|
{
|
||||||
// load in cpu_hw_ids in cpuid_to_hwid,
|
// load in cpu_hw_ids in cpuid_to_hwid,
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2006-2020, RT-Thread Development Team
|
* Copyright (c) 2006-2024, RT-Thread Development Team
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*
|
*
|
||||||
* Date Author Notes
|
* Date Author Notes
|
||||||
* 2018-10-06 ZhaoXiaowei the first version
|
* 2018-10-06 ZhaoXiaowei the first version
|
||||||
|
* 2024-04-28 Shell add generic spinlock implementation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
@ -80,7 +81,7 @@ rt_hw_set_elx_env:
|
||||||
0:
|
0:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
.global rt_cpu_vector_set_base
|
.globl rt_cpu_vector_set_base
|
||||||
rt_cpu_vector_set_base:
|
rt_cpu_vector_set_base:
|
||||||
MSR VBAR_EL1,X0
|
MSR VBAR_EL1,X0
|
||||||
RET
|
RET
|
||||||
|
@ -89,7 +90,7 @@ rt_cpu_vector_set_base:
|
||||||
/**
|
/**
|
||||||
* unsigned long rt_hw_ffz(unsigned long x)
|
* unsigned long rt_hw_ffz(unsigned long x)
|
||||||
*/
|
*/
|
||||||
.global rt_hw_ffz
|
.globl rt_hw_ffz
|
||||||
rt_hw_ffz:
|
rt_hw_ffz:
|
||||||
mvn x1, x0
|
mvn x1, x0
|
||||||
clz x0, x1
|
clz x0, x1
|
||||||
|
@ -97,7 +98,80 @@ rt_hw_ffz:
|
||||||
sub x0, x1, x0
|
sub x0, x1, x0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.global rt_hw_clz
|
.globl rt_hw_clz
|
||||||
rt_hw_clz:
|
rt_hw_clz:
|
||||||
clz x0, x0
|
clz x0, x0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spinlock (fallback implementation)
|
||||||
|
*/
|
||||||
|
|
||||||
|
rt_hw_spin_lock_init:
|
||||||
|
.weak rt_hw_spin_lock_init
|
||||||
|
stlr wzr, [x0]
|
||||||
|
ret
|
||||||
|
|
||||||
|
rt_hw_spin_trylock:
|
||||||
|
.weak rt_hw_spin_trylock
|
||||||
|
sub sp, sp, #16
|
||||||
|
ldar w2, [x0]
|
||||||
|
add x1, sp, 8
|
||||||
|
stlr w2, [x1]
|
||||||
|
ldarh w1, [x1]
|
||||||
|
and w1, w1, 65535
|
||||||
|
add x3, sp, 10
|
||||||
|
ldarh w3, [x3]
|
||||||
|
cmp w1, w3, uxth
|
||||||
|
beq 1f
|
||||||
|
mov w0, 0
|
||||||
|
add sp, sp, 16
|
||||||
|
ret
|
||||||
|
1:
|
||||||
|
add x1, sp, 10
|
||||||
|
2:
|
||||||
|
ldaxrh w3, [x1]
|
||||||
|
add w3, w3, 1
|
||||||
|
stlxrh w4, w3, [x1]
|
||||||
|
cbnz w4, 2b
|
||||||
|
add x1, sp, 8
|
||||||
|
ldar w1, [x1]
|
||||||
|
3:
|
||||||
|
ldaxr w3, [x0]
|
||||||
|
cmp w3, w2
|
||||||
|
bne 4f
|
||||||
|
stxr w4, w1, [x0]
|
||||||
|
cbnz w4, 3b
|
||||||
|
4:
|
||||||
|
cset w0, eq
|
||||||
|
add sp, sp, 16
|
||||||
|
ret
|
||||||
|
|
||||||
|
rt_hw_spin_lock:
|
||||||
|
.weak rt_hw_spin_lock
|
||||||
|
add x1, x0, 2
|
||||||
|
1:
|
||||||
|
ldxrh w2, [x1]
|
||||||
|
add w3, w2, 1
|
||||||
|
stxrh w4, w3, [x1]
|
||||||
|
cbnz w4, 1b
|
||||||
|
and w2, w2, 65535
|
||||||
|
ldarh w1, [x0]
|
||||||
|
cmp w2, w1, uxth
|
||||||
|
beq 3f
|
||||||
|
sevl
|
||||||
|
2:
|
||||||
|
wfe
|
||||||
|
ldaxrh w1, [x0]
|
||||||
|
cmp w2, w1
|
||||||
|
bne 2b
|
||||||
|
3:
|
||||||
|
ret
|
||||||
|
|
||||||
|
rt_hw_spin_unlock:
|
||||||
|
.weak rt_hw_spin_unlock
|
||||||
|
ldxrh w1, [x0]
|
||||||
|
add w1, w1, 1
|
||||||
|
stlxrh w2, w1, [x0]
|
||||||
|
cbnz w2, rt_hw_spin_unlock
|
||||||
|
ret
|
||||||
|
|
|
@ -17,10 +17,42 @@
|
||||||
#include <rtdef.h>
|
#include <rtdef.h>
|
||||||
|
|
||||||
#ifdef RT_USING_SMP
|
#ifdef RT_USING_SMP
|
||||||
typedef struct {
|
|
||||||
volatile unsigned int lock;
|
/**
|
||||||
|
* Spinlock
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
rt_uint32_t value;
|
||||||
} rt_hw_spinlock_t;
|
} rt_hw_spinlock_t;
|
||||||
#endif
|
|
||||||
|
/**
|
||||||
|
* Generic hw-cpu-id
|
||||||
|
*/
|
||||||
|
#ifdef ARCH_USING_GENERIC_CPUID
|
||||||
|
|
||||||
|
#if RT_CPUS_NR > 1
|
||||||
|
|
||||||
|
rt_inline int rt_hw_cpu_id(void)
|
||||||
|
{
|
||||||
|
long cpuid;
|
||||||
|
__asm__ volatile("mrs %0, tpidr_el1":"=r"(cpuid));
|
||||||
|
return cpuid;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
rt_inline int rt_hw_cpu_id(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* RT_CPUS_NR > 1 */
|
||||||
|
|
||||||
|
#endif /* ARCH_USING_GENERIC_CPUID */
|
||||||
|
|
||||||
|
#endif /* RT_USING_SMP */
|
||||||
|
|
||||||
#define rt_hw_barrier(cmd, ...) \
|
#define rt_hw_barrier(cmd, ...) \
|
||||||
__asm__ volatile (RT_STRINGIFY(cmd) " "RT_STRINGIFY(__VA_ARGS__):::"memory")
|
__asm__ volatile (RT_STRINGIFY(cmd) " "RT_STRINGIFY(__VA_ARGS__):::"memory")
|
||||||
|
|
|
@ -146,7 +146,7 @@ rt_base_t rt_cpus_lock(void)
|
||||||
pcpu = rt_cpu_self();
|
pcpu = rt_cpu_self();
|
||||||
if (pcpu->current_thread != RT_NULL)
|
if (pcpu->current_thread != RT_NULL)
|
||||||
{
|
{
|
||||||
register rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
|
rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
|
||||||
|
|
||||||
rt_atomic_add(&(pcpu->current_thread->cpus_lock_nest), 1);
|
rt_atomic_add(&(pcpu->current_thread->cpus_lock_nest), 1);
|
||||||
if (lock_nest == 0)
|
if (lock_nest == 0)
|
||||||
|
|
|
@ -1089,6 +1089,7 @@ void rt_exit_critical_safe(rt_base_t critical_level)
|
||||||
|
|
||||||
void rt_exit_critical_safe(rt_base_t critical_level)
|
void rt_exit_critical_safe(rt_base_t critical_level)
|
||||||
{
|
{
|
||||||
|
RT_UNUSED(critical_level);
|
||||||
return rt_exit_critical();
|
return rt_exit_critical();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -363,7 +363,8 @@ rt_thread_t rt_thread_self(void)
|
||||||
self = rt_cpu_self()->current_thread;
|
self = rt_cpu_self()->current_thread;
|
||||||
rt_hw_local_irq_enable(lock);
|
rt_hw_local_irq_enable(lock);
|
||||||
return self;
|
return self;
|
||||||
#else
|
|
||||||
|
#else /* !RT_USING_SMP */
|
||||||
extern rt_thread_t rt_current_thread;
|
extern rt_thread_t rt_current_thread;
|
||||||
|
|
||||||
return rt_current_thread;
|
return rt_current_thread;
|
||||||
|
|
Loading…
Reference in New Issue