feat: smp_call: added signaling call_req

This patch introduces `rt_smp_call_request` API to handle queued
requests across cores with user provided data buffer, which provides a
way to request IPI through a non-blocking pattern.

It also resolved several issues in the old implementation:
- Multiple requests from different cores can not be queued in the work
  object of the target core.
- Data racing on `rt_smp_work` of same core. If multiple requests came
  in turns, or if the call is used by the target cpu, while a new
  request is coming, the value will be overwrite.
- Memory vulnerability. The rt_smp_event is allocated on stack, though
  the caller may not wait until the call is done.
- API naming problem. Actually we don't provide a way to issue an IPI to
  ANY core in mask. What the API do is aligned to MANY pattern.
- FUNC_IPI registering to PIC.

Changes:
- Declared and configured the new `RT_SMP_CALL_IPI` to support
  functional IPIs for task requests across cores.
- Replaced the single `rt_smp_work` array with `call_req_cores` to
  manage per-core call requests safely.
- Added `_call_req_take` and `_call_req_release` functions for atomic
  handling of request lifetimes, preventing data race conditions.
- Replaced single event handling with a queue-based approach
  (`call_queue`) for efficient multi-request processing per core.
- Introduced `rt_smp_call_ipi_handler` to process queued requests,
  reducing IPI contention by only sending new requests when needed.
- Implemented `_smp_call_remote_request` to handle remote requests
  with specific flags, enabling more flexible core-to-core task
  signaling.
- Refined `rt_smp_call_req_init` to initialize and track requests
  with atomic usage flags, mitigating potential memory vulnerabilities.

Signed-off-by: Shell <smokewood@qq.com>
This commit is contained in:
Shell 2024-10-31 11:57:04 +08:00 committed by GuEe-GUI
parent ca9aa271a2
commit 733109996a
6 changed files with 364 additions and 95 deletions

View File

@ -71,8 +71,9 @@ void gic_common_sgi_config(void *base, void *data, int irq_base)
pirq = rt_pic_find_ipi(data, ipi); \
pirq->mode = RT_IRQ_MODE_EDGE_RISING; \
DECLARE_GIC_IPI(RT_SCHEDULE_IPI, 0);
DECLARE_GIC_IPI(RT_STOP_IPI, 1);
DECLARE_GIC_IPI(RT_SCHEDULE_IPI, RT_SCHEDULE_IPI);
DECLARE_GIC_IPI(RT_STOP_IPI, RT_STOP_IPI);
DECLARE_GIC_IPI(RT_SMP_CALL_IPI, RT_SMP_CALL_IPI);
#undef DECLARE_GIC_IPI
}

View File

@ -31,6 +31,7 @@ static int _ipi_hash[] =
#ifdef RT_USING_SMP
[RT_SCHEDULE_IPI] = RT_SCHEDULE_IPI,
[RT_STOP_IPI] = RT_STOP_IPI,
[RT_SMP_CALL_IPI] = RT_SMP_CALL_IPI,
#endif
};

View File

@ -6,6 +6,8 @@
* Change Logs:
* Date Author Notes
* 2024/9/12 zhujiale the first version
* 2024/10/24 Shell added non-blocking IPI calling method;
* fixup data racing
*/
#include "smp.h"
@ -14,37 +16,299 @@
#define DBG_LVL DBG_INFO
#include <rtdbg.h>
static struct rt_smp_call rt_smp_work[RT_CPUS_NR];
static rt_atomic_t rt_smp_wait;
static struct smp_data
{
/* call request data to each cores */
struct rt_smp_call_req call_req_cores[RT_CPUS_NR];
static rt_err_t smp_call_handler(struct rt_smp_event *event)
/* call queue of this core */
rt_ll_slist_t call_queue;
} _smp_data_cores[RT_CPUS_NR];
#define _CALL_REQ_USAGE_FREED 0
#define _CALL_REQ_USAGE_BUSY 1
static void _call_req_take(struct rt_smp_call_req *req)
{
switch (event->event_id)
rt_base_t exp;
do
{
case SMP_CALL_EVENT_FUNC:
exp = _CALL_REQ_USAGE_FREED;
}
while (!rt_atomic_compare_exchange_strong(&req->event.typed.usage_tracer, &exp, _CALL_REQ_USAGE_BUSY));
}
static void _call_req_release(struct rt_smp_call_req *req)
{
rt_atomic_store(&req->event.typed.usage_tracer, _CALL_REQ_USAGE_FREED);
}
void rt_smp_request_wait_freed(struct rt_smp_call_req *req)
{
rt_base_t usage_tracer;
RT_DEBUG_IN_THREAD_CONTEXT;
usage_tracer = rt_atomic_load(&req->event.typed.usage_tracer);
while (usage_tracer != _CALL_REQ_USAGE_FREED)
{
rt_thread_yield();
usage_tracer = rt_atomic_load(&req->event.typed.usage_tracer);
}
}
static void _mask_out_cpu(struct rt_smp_event *event, int oncpu)
{
rt_base_t new_mask, old_mask;
rt_atomic_t *maskp = event->typed.calling_cpu_mask;
do
{
old_mask = rt_atomic_load(maskp);
new_mask = old_mask & ~(1ul << oncpu);
} while (!rt_atomic_compare_exchange_strong(maskp, &old_mask, new_mask));
}
static void _do_glob_request(struct rt_smp_call_req *req_global,
struct rt_smp_call_req *req_local)
{
struct rt_smp_event *event;
/* release the global request data */
rt_memcpy(req_local, req_global, sizeof(struct rt_smp_call_req));
rt_hw_spin_unlock(&req_global->freed_lock);
event = &req_local->event;
RT_ASSERT(!!event->func);
event->func(event->data);
rt_atomic_add(&rt_smp_wait, 1);
return ;
}
static void _do_request(struct rt_smp_call_req *req)
{
struct rt_smp_event *event;
event = &req->event;
RT_ASSERT(!!event->func);
event->func(event->data);
_call_req_release(req);
return ;
}
static rt_err_t _smp_call_handler(struct rt_smp_call_req *req, int oncpu)
{
switch (req->event.event_id)
{
case SMP_CALL_EVENT_GLOB_SYNC:
{
struct rt_smp_call_req req_local;
_do_glob_request(req, &req_local);
_mask_out_cpu(&req_local.event, oncpu);
break;
}
case SMP_CALL_EVENT_GLOB_ASYNC:
{
struct rt_smp_call_req req_local;
_do_glob_request(req, &req_local);
break;
}
case SMP_CALL_EVENT_REQUEST:
{
_do_request(req);
break;
}
default:
LOG_E("error event id\n");
return -RT_ERROR;
}
return RT_EOK;
}
void rt_smp_call_ipi_handler(int vector, void *param)
{
int cur_cpu = rt_hw_cpu_id();
int oncpu = rt_hw_cpu_id();
struct rt_smp_call_req *request;
rt_spin_lock(&rt_smp_work[cur_cpu].lock);
if (rt_smp_work[cur_cpu].event.event_id)
RT_ASSERT(rt_interrupt_get_nest());
while (1)
{
if (smp_call_handler(&rt_smp_work[cur_cpu].event) != RT_EOK)
rt_ll_slist_t *node = rt_ll_slist_dequeue(&_smp_data_cores[oncpu].call_queue);
if (node)
{
LOG_E("Have no event\n");
request = rt_list_entry(node, struct rt_smp_call_req, slist_node);
_smp_call_handler(request, oncpu);
}
rt_memset(&rt_smp_work[cur_cpu].event, 0, sizeof(struct rt_smp_event));
else
{
break;
}
}
}
static void _smp_call_remote_request(int callcpu, rt_smp_call_cb_t func,
void *data, rt_uint8_t flags,
struct rt_smp_call_req *call_req)
{
rt_base_t cpu_mask = 1ul << callcpu;
_call_req_take(call_req);
rt_ll_slist_enqueue(&_smp_data_cores[callcpu].call_queue, &call_req->slist_node);
rt_hw_ipi_send(RT_SMP_CALL_IPI, cpu_mask);
}
/**
* @brief SMP call request with user provided @call_req. Compare to
* rt_smp_call_func* family, you can call it in ISR or IRQ-masked
* environment.
*
* @param callcpu the logical core id of the target
* @param flags control flags of your request
* @param call_req the pre-initialized request data
* @return rt_err_t RT_EOK on succeed, otherwise the errno to failure
*/
rt_err_t rt_smp_call_request(int callcpu, rt_uint8_t flags, struct rt_smp_call_req *call_req)
{
rt_ubase_t clvl;
int oncpu;
if (rt_atomic_load(&call_req->event.typed.usage_tracer) ==
_CALL_REQ_USAGE_BUSY)
{
return -RT_EBUSY;
}
if (flags & SMP_CALL_WAIT_ALL)
{
return -RT_EINVAL;
}
clvl = rt_enter_critical();
oncpu = rt_hw_cpu_id();
if (oncpu == callcpu && !(flags & SMP_CALL_NO_LOCAL))
{
rt_ubase_t level;
/* handle IPI on irq-masked environment */
level = rt_hw_local_irq_disable();
call_req->event.func(call_req->event.data);
rt_hw_local_irq_enable(level);
}
else if (callcpu < RT_CPUS_NR)
{
_smp_call_remote_request(callcpu, call_req->event.func, call_req->event.data, flags, call_req);
}
rt_exit_critical_safe(clvl);
return RT_EOK;
}
void rt_smp_call_req_init(struct rt_smp_call_req *call_req,
rt_smp_call_cb_t func, void *data)
{
call_req->event.typed.usage_tracer = 0;
call_req->event.data = data;
call_req->event.func = func;
call_req->event.event_id = SMP_CALL_EVENT_REQUEST;
}
static void _smp_call_func_cond(int oncpu, rt_ubase_t cpu_mask,
rt_smp_call_cb_t func, void *data,
rt_uint8_t flags, rt_smp_cond_t cond)
{
rt_ubase_t tmp_mask;
rt_bool_t sync_call = RT_FALSE;
rt_ubase_t oncpu_mask = 1 << oncpu;
rt_atomic_t calling_cpu_mask, *maskp;
int tmp_id = 0, rcpu_cnt = 0, event_id, call_local;
if (!(flags & SMP_CALL_NO_LOCAL) && (oncpu_mask & cpu_mask))
{
call_local = RT_TRUE;
cpu_mask = cpu_mask & (~oncpu_mask);
}
else
{
call_local = RT_FALSE;
}
if (cpu_mask)
{
tmp_mask = cpu_mask;
if (flags & SMP_CALL_WAIT_ALL)
{
sync_call = RT_TRUE;
maskp = &calling_cpu_mask;
event_id = SMP_CALL_EVENT_GLOB_SYNC;
rt_atomic_store(maskp, cpu_mask);
}
else
{
event_id = SMP_CALL_EVENT_GLOB_ASYNC;
maskp = RT_NULL;
}
while (tmp_mask)
{
struct rt_smp_call_req *call_req;
struct rt_smp_event *event;
int lz_bit = __rt_ffsl(tmp_mask);
tmp_id = lz_bit - 1;
tmp_mask &= ~(1ul << tmp_id);
if (cond && !cond(tmp_id, data))
{
cpu_mask &= ~(1ul << tmp_id);
continue;
}
/* need to wait one more */
rcpu_cnt++;
call_req = &_smp_data_cores[oncpu].call_req_cores[tmp_id];
/* very careful here, spinning wait on previous occupation */
rt_hw_spin_lock(&call_req->freed_lock);
event = &call_req->event;
event->event_id = event_id;
event->func = func;
event->data = data;
event->typed.calling_cpu_mask = maskp;
rt_ll_slist_enqueue(&_smp_data_cores[tmp_id].call_queue, &call_req->slist_node);
}
if (cpu_mask)
{
RT_ASSERT(rcpu_cnt);
rt_hw_ipi_send(RT_SMP_CALL_IPI, cpu_mask);
}
}
if (call_local && (!cond || cond(tmp_id, data)))
{
rt_ubase_t level;
/* callback on local with sims ISR */
level = rt_hw_local_irq_disable();
func(data);
rt_hw_local_irq_enable(level);
}
if (sync_call && rcpu_cnt)
{
while (rt_atomic_load(maskp) & cpu_mask)
;
}
rt_spin_unlock(&rt_smp_work[cur_cpu].lock);
}
/**
@ -58,86 +322,53 @@ void rt_smp_call_ipi_handler(int vector, void *param)
* else it will call function on specified CPU and return immediately
* @param cond the condition function pointer,if you set it then it will call function only when cond return true
*/
void rt_smp_call_func_cond(int cpu_mask, rt_smp_call_func_back func, void *data, rt_uint8_t flag, rt_smp_cond cond)
void rt_smp_call_func_cond(rt_ubase_t cpu_mask, rt_smp_call_cb_t func, void *data, rt_uint8_t flag, rt_smp_cond_t cond)
{
RT_DEBUG_NOT_IN_INTERRUPT;
struct rt_smp_event event;
rt_bool_t need_call = RT_TRUE, need_wait = RT_FALSE;
int cur_cpu = rt_hw_cpu_id();
int cpuid = 1 << cur_cpu;
int tmp_id = 0, cpu_nr = 0;
int tmp_mask;
int irq_flag;
int oncpu;
rt_ubase_t clvl;
if (flag == SMP_CALL_WAIT_ALL)
RT_ASSERT(!rt_hw_interrupt_is_disabled());
clvl = rt_enter_critical();
oncpu = rt_hw_cpu_id();
if (cpu_mask <= RT_ALL_CPU)
{
need_wait = RT_TRUE;
rt_atomic_store(&rt_smp_wait, 0);
_smp_call_func_cond(oncpu, cpu_mask, func, data, flag, cond);
}
if (cpuid & cpu_mask)
{
func(data);
cpu_mask = cpu_mask & (~cpuid);
rt_exit_critical_safe(clvl);
}
if (!cpu_mask)
need_call = RT_FALSE;
tmp_mask = cpu_mask;
if (need_call)
{
while (tmp_mask)
{
if ((tmp_mask & 1) && (tmp_id < RT_CPUS_NR))
{
if (cond && !cond(tmp_id, data))
continue;
cpu_nr++;
event.event_id = SMP_CALL_EVENT_FUNC;
event.func = func;
event.data = data;
event.cpu_mask = cpu_mask;
irq_flag = rt_spin_lock_irqsave(&rt_smp_work[tmp_id].lock);
rt_smp_work[tmp_id].event = event;
rt_spin_unlock_irqrestore(&rt_smp_work[tmp_id].lock,irq_flag);
}
tmp_id++;
tmp_mask = tmp_mask >> 1;
}
rt_hw_ipi_send(RT_FUNC_IPI, cpu_mask);
}
if (need_wait)
{
while (rt_atomic_load(&rt_smp_wait) != cpu_nr);
}
}
void rt_smp_call_each_cpu(rt_smp_call_func_back func, void *data, rt_uint8_t flag)
void rt_smp_call_each_cpu(rt_smp_call_cb_t func, void *data, rt_uint8_t flag)
{
rt_smp_call_func_cond(RT_ALL_CPU, func, data, flag, RT_NULL);
}
void rt_smp_call_each_cpu_cond(rt_smp_call_func_back func, void *data, rt_uint8_t flag, rt_smp_cond cond_func)
void rt_smp_call_each_cpu_cond(rt_smp_call_cb_t func, void *data, rt_uint8_t flag, rt_smp_cond_t cond_func)
{
rt_smp_call_func_cond(RT_ALL_CPU, func, data, flag, cond_func);
}
void rt_smp_call_any_cpu(int cpu_mask, rt_smp_call_func_back func, void *data, rt_uint8_t flag)
void rt_smp_call_cpu_mask(rt_ubase_t cpu_mask, rt_smp_call_cb_t func, void *data, rt_uint8_t flag)
{
rt_smp_call_func_cond(cpu_mask, func, data, flag, RT_NULL);
}
void rt_smp_call_any_cpu_cond(int cpu_mask, rt_smp_call_func_back func, void *data, rt_uint8_t flag, rt_smp_cond cond_func)
void rt_smp_call_cpu_mask_cond(rt_ubase_t cpu_mask, rt_smp_call_cb_t func, void *data, rt_uint8_t flag, rt_smp_cond_t cond_func)
{
rt_smp_call_func_cond(cpu_mask, func, data, flag, cond_func);
}
void rt_smp_init(void)
void rt_smp_call_init(void)
{
rt_memset(&_smp_data_cores, 0, sizeof(_smp_data_cores));
for (int i = 0; i < RT_CPUS_NR; i++)
{
rt_memset(&rt_smp_work[i], 0, sizeof(struct rt_smp_call));
rt_spin_lock_init(&rt_smp_work[i].lock);
for (int j = 0; j < RT_CPUS_NR; j++)
{
rt_hw_spin_lock_init(&_smp_data_cores[i].call_req_cores[j].freed_lock);
}
}
}

View File

@ -1,34 +1,69 @@
/*
* Copyright (c) 2006-2024 RT-Thread Development Team
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2024/9/12 zhujiale the first version
* 2024/10/24 Shell added non-blocking IPI calling method
*/
#ifndef __SMP_IPI_H__
#define __SMP_IPI_H__
#include <rtthread.h>
typedef void (*rt_smp_call_func_back)(void *data);
typedef rt_bool_t (*rt_smp_cond)(int cpu, void *info);
#define SMP_CALL_EVENT_FUNC 0x1
/* callback of smp call */
typedef void (*rt_smp_call_cb_t)(void *data);
typedef rt_bool_t (*rt_smp_cond_t)(int cpu, void *info);
#define SMP_CALL_WAIT_ALL (1 << 0)
#define SMP_CALL_NO_WAIT (1 << 1)
#define SMP_CALL_EVENT_GLOB_ASYNC 0x1
#define SMP_CALL_EVENT_GLOB_SYNC 0x2
#define SMP_CALL_EVENT_REQUEST 0x4
#define SMP_CALL_WAIT_ALL (1ul << 0)
#define SMP_CALL_NO_LOCAL (1ul << 1)
#define SMP_CALL_SIGNAL (1ul << 2)
#define RT_ALL_CPU ((1 << RT_CPUS_NR) - 1)
struct rt_smp_event
{
int cpu_mask;
int event_id;
void *data;
rt_smp_call_func_back func;
};
struct rt_smp_call
rt_smp_call_cb_t func;
union
{
struct rt_spinlock lock;
struct rt_smp_event event;
rt_atomic_t *calling_cpu_mask;
rt_atomic_t usage_tracer;
} typed;
};
struct rt_smp_call_req
{
/* handle the busy status synchronization */
rt_hw_spinlock_t freed_lock;
struct rt_smp_event event;
rt_ll_slist_t slist_node;
};
void rt_smp_call_ipi_handler(int vector, void *param);
void rt_smp_call_each_cpu(rt_smp_call_func_back func, void *data, rt_uint8_t flag);
void rt_smp_call_each_cpu_cond(rt_smp_call_func_back func, void *data, rt_uint8_t flag, rt_smp_cond cond_func);
void rt_smp_call_any_cpu(int cpu_mask, rt_smp_call_func_back func, void *data, rt_uint8_t flag);
void rt_smp_call_any_cpu_cond(int cpu_mask, rt_smp_call_func_back func, void *data, rt_uint8_t flag, rt_smp_cond cond_func);
void rt_smp_init(void);
void rt_smp_call_each_cpu(rt_smp_call_cb_t func, void *data, rt_uint8_t flags);
void rt_smp_call_each_cpu_cond(rt_smp_call_cb_t func, void *data, rt_uint8_t flag, rt_smp_cond_t cond_func);
void rt_smp_call_cpu_mask(rt_ubase_t cpu_mask, rt_smp_call_cb_t func, void *data, rt_uint8_t flags);
void rt_smp_call_cpu_mask_cond(rt_ubase_t cpu_mask, rt_smp_call_cb_t func, void *data, rt_uint8_t flag, rt_smp_cond_t cond_func);
void rt_smp_call_init(void);
rt_err_t rt_smp_call_request(int callcpu, rt_uint8_t flags, struct rt_smp_call_req *call_req);
void rt_smp_call_req_init(struct rt_smp_call_req *call_req,
rt_smp_call_cb_t func, void *data);
void rt_smp_request_wait_freed(struct rt_smp_call_req *req);
#define rt_smp_for_each_cpu(_iter) for (_iter = 0; (_iter) < RT_CPUS_NR; (_iter)++)
rt_inline size_t rt_smp_get_next_remote(size_t iter, size_t cpuid)
{
iter++;
return iter == cpuid ? iter + 1 : iter;
}
#define rt_smp_for_each_remote_cpu(_iter, _cpuid) for (_iter = rt_smp_get_next_remote(-1, _cpuid); (_iter) < RT_CPUS_NR; _iter=rt_smp_get_next_remote(_iter, _cpuid))
#endif

View File

@ -674,8 +674,8 @@ typedef struct rt_cpu_usage_stats *rt_cpu_usage_stats_t;
#define RT_STOP_IPI 1
#endif /* RT_STOP_IPI */
#ifndef RT_FUNC_IPI
#define RT_FUNC_IPI 2
#ifndef RT_SMP_CALL_IPI
#define RT_SMP_CALL_IPI 2
#endif
#define RT_MAX_IPI 3

View File

@ -302,14 +302,14 @@ void rt_hw_common_setup(void)
rt_thread_idle_sethook(rt_hw_idle_wfi);
#ifdef RT_USING_SMP
rt_smp_init();
rt_smp_call_init();
/* Install the IPI handle */
rt_hw_ipi_handler_install(RT_SCHEDULE_IPI, rt_scheduler_ipi_handler);
rt_hw_ipi_handler_install(RT_STOP_IPI, rt_scheduler_ipi_handler);
rt_hw_ipi_handler_install(RT_FUNC_IPI, rt_smp_call_ipi_handler);
rt_hw_ipi_handler_install(RT_SMP_CALL_IPI, rt_smp_call_ipi_handler);
rt_hw_interrupt_umask(RT_SCHEDULE_IPI);
rt_hw_interrupt_umask(RT_STOP_IPI);
rt_hw_interrupt_umask(RT_FUNC_IPI);
rt_hw_interrupt_umask(RT_SMP_CALL_IPI);
#endif
}
@ -391,6 +391,7 @@ rt_weak void rt_hw_secondary_cpu_bsp_start(void)
rt_hw_interrupt_umask(RT_SCHEDULE_IPI);
rt_hw_interrupt_umask(RT_STOP_IPI);
rt_hw_interrupt_umask(RT_SMP_CALL_IPI);
LOG_I("Call cpu %d on %s", cpu_id, "success");