1303 lines
37 KiB
C
1303 lines
37 KiB
C
|
/*
|
||
|
* Copyright (c) 2006-2023, RT-Thread Development Team
|
||
|
*
|
||
|
* SPDX-License-Identifier: Apache-2.0
|
||
|
*
|
||
|
* Change Logs:
|
||
|
* Date Author Notes
|
||
|
* 2023-02-25 GuEe-GUI the first version
|
||
|
*/
|
||
|
|
||
|
#include <rthw.h>
|
||
|
#include <rtthread.h>
|
||
|
#include <rtdevice.h>
|
||
|
|
||
|
#define DBG_TAG "rtdm.nvme"
|
||
|
#define DBG_LVL DBG_INFO
|
||
|
#include <rtdbg.h>
|
||
|
|
||
|
static struct rt_dm_ida nvme_controller_ida = RT_DM_IDA_INIT(CUSTOM);
|
||
|
static struct rt_dm_ida nvme_ida = RT_DM_IDA_INIT(NVME);
|
||
|
|
||
|
static struct rt_spinlock nvme_lock = {};
|
||
|
static rt_list_t nvme_nodes = RT_LIST_OBJECT_INIT(nvme_nodes);
|
||
|
|
||
|
rt_inline rt_uint32_t nvme_readl(struct rt_nvme_controller *nvme, int offset)
|
||
|
{
|
||
|
return HWREG32(nvme->regs + offset);
|
||
|
}
|
||
|
|
||
|
rt_inline void nvme_writel(struct rt_nvme_controller *nvme, int offset, rt_uint32_t value)
|
||
|
{
|
||
|
HWREG32(nvme->regs + offset) = value;
|
||
|
}
|
||
|
|
||
|
rt_inline rt_uint64_t nvme_readq(struct rt_nvme_controller *nvme, int offset)
|
||
|
{
|
||
|
rt_uint32_t lo32, hi32;
|
||
|
|
||
|
lo32 = HWREG32(nvme->regs + offset);
|
||
|
hi32 = HWREG32(nvme->regs + offset + 4);
|
||
|
|
||
|
return ((rt_uint64_t)hi32 << 32) + lo32;
|
||
|
}
|
||
|
|
||
|
rt_inline void nvme_writeq(struct rt_nvme_controller *nvme, int offset, rt_uint64_t value)
|
||
|
{
|
||
|
nvme_writel(nvme, offset, (rt_uint32_t)(value & 0xffffffff));
|
||
|
nvme_writel(nvme, offset + 4, (rt_uint32_t)(value >> 32));
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_poll_csts(struct rt_nvme_controller *nvme,
|
||
|
rt_uint32_t mask, rt_uint32_t value)
|
||
|
{
|
||
|
rt_tick_t timeout;
|
||
|
|
||
|
timeout = rt_tick_from_millisecond(RT_NVME_CAP_TIMEOUT(nvme->cap) * 500);
|
||
|
timeout += rt_tick_get();
|
||
|
|
||
|
do {
|
||
|
if ((nvme_readl(nvme, RT_NVME_REG_CSTS) & mask) == value)
|
||
|
{
|
||
|
return RT_EOK;
|
||
|
}
|
||
|
|
||
|
rt_hw_cpu_relax();
|
||
|
} while (rt_tick_get() < timeout);
|
||
|
|
||
|
return -RT_ETIMEOUT;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_enable_ctrl(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
|
||
|
nvme->ctrl_config |= RT_NVME_CC_ENABLE;
|
||
|
nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
|
||
|
|
||
|
return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, RT_NVME_CSTS_RDY);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_disable_ctrl(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
|
||
|
nvme->ctrl_config &= ~RT_NVME_CC_ENABLE;
|
||
|
nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
|
||
|
|
||
|
return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, 0);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_shutdown_ctrl(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
|
||
|
nvme->ctrl_config |= RT_NVME_CC_SHN_NORMAL;
|
||
|
nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
|
||
|
|
||
|
return nvme_poll_csts(nvme, RT_NVME_CSTS_SHST_MASK, RT_NVME_CSTS_SHST_CMPLT);
|
||
|
}
|
||
|
|
||
|
rt_inline rt_le16_t nvme_next_cmdid(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
return rt_cpu_to_le16((rt_uint16_t)rt_atomic_add(&nvme->cmdid, 1));
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_submit_cmd(struct rt_nvme_queue *queue,
|
||
|
struct rt_nvme_command *cmd)
|
||
|
{
|
||
|
rt_ubase_t level;
|
||
|
rt_err_t err = RT_EOK;
|
||
|
rt_uint16_t tail, head;
|
||
|
struct rt_nvme_controller *nvme = queue->nvme;
|
||
|
|
||
|
_retry:
|
||
|
level = rt_spin_lock_irqsave(&queue->lock);
|
||
|
|
||
|
tail = queue->sq_tail;
|
||
|
head = queue->cq_head;
|
||
|
|
||
|
if (tail + 1 == head)
|
||
|
{
|
||
|
/* IO queue is full, waiting for the last IO command to complete. */
|
||
|
rt_spin_unlock_irqrestore(&queue->lock, level);
|
||
|
|
||
|
rt_thread_yield();
|
||
|
|
||
|
goto _retry;
|
||
|
}
|
||
|
|
||
|
cmd->common.cmdid = nvme_next_cmdid(nvme);
|
||
|
rt_memcpy(&queue->sq_cmds[tail], cmd, sizeof(*cmd));
|
||
|
|
||
|
if (nvme->ops->submit_cmd)
|
||
|
{
|
||
|
if ((err = nvme->ops->submit_cmd(queue, cmd)))
|
||
|
{
|
||
|
return err;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (++tail == queue->depth)
|
||
|
{
|
||
|
tail = 0;
|
||
|
}
|
||
|
HWREG32(queue->doorbell) = tail;
|
||
|
queue->sq_tail = tail;
|
||
|
|
||
|
queue->cmd = cmd;
|
||
|
queue->err = RT_EOK;
|
||
|
|
||
|
rt_spin_unlock_irqrestore(&queue->lock, level);
|
||
|
|
||
|
err = rt_completion_wait(&queue->done,
|
||
|
rt_tick_from_millisecond(queue->qid != 0 ? RT_WAITING_FOREVER : 60));
|
||
|
|
||
|
return err ? : queue->err;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_set_features_simple(struct rt_nvme_controller *nvme,
|
||
|
rt_uint32_t fid, rt_uint32_t dword11)
|
||
|
{
|
||
|
struct rt_nvme_command cmd;
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.features.opcode = RT_NVME_ADMIN_OPCODE_SET_FEATURES;
|
||
|
cmd.features.fid = rt_cpu_to_le32(fid);
|
||
|
cmd.features.dword11 = rt_cpu_to_le32(dword11);
|
||
|
|
||
|
return nvme_submit_cmd(&nvme->admin_queue, &cmd);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_submit_io_cmd(struct rt_nvme_controller *nvme,
|
||
|
struct rt_nvme_command *cmd)
|
||
|
{
|
||
|
rt_uint16_t qid;
|
||
|
|
||
|
qid = rt_atomic_add(&nvme->ioqid[rt_hw_cpu_id()], RT_CPUS_NR);
|
||
|
qid %= nvme->io_queue_max;
|
||
|
|
||
|
return nvme_submit_cmd(&nvme->io_queues[qid], cmd);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* PRP Mode:
|
||
|
*
|
||
|
* |63 n+1|n 0|
|
||
|
* +----------------------------------------+----------+---+---+
|
||
|
* | Page Base Address | Offset | 0 | 0 |
|
||
|
* +----------------------------------------+----------+---+---+
|
||
|
* |
|
||
|
* v
|
||
|
* Host Physical Pages
|
||
|
* +----------------------------+
|
||
|
* +--------------+----------+ | Page k |
|
||
|
* | PRP Entry1 | Offset +---------->+----------------------------+
|
||
|
* +--------------+----------+ | Page k + 1 |
|
||
|
* +----------------------------+
|
||
|
* ...
|
||
|
* +----------------------------+
|
||
|
* +--------------+----------+ | Page k + m |
|
||
|
* | PRP Entry2 | 0 +---------->+----------------------------+
|
||
|
* +--------------+----------+ | Page k + m + 1 |
|
||
|
* +----------------------------+
|
||
|
* PRP List (In PRP Entry2):
|
||
|
*
|
||
|
* |63 n+1|n 0|
|
||
|
* +----------------------------------------+------------------+
|
||
|
* | Page Base Address k | 0h |
|
||
|
* +----------------------------------------+------------------+
|
||
|
* | Page Base Address k + 1 | 0h |
|
||
|
* +----------------------------------------+------------------+
|
||
|
* | ... |
|
||
|
* +----------------------------------------+------------------+
|
||
|
* | Page Base Address k + m | 0h |
|
||
|
* +----------------------------------------+------------------+
|
||
|
* | Page Base Address k + m + 1 | 0h |
|
||
|
* +----------------------------------------+------------------+
|
||
|
*
|
||
|
* SGL Mode:
|
||
|
* +----- Non-transport
|
||
|
* LBA /
|
||
|
* +---------------+---------------+-------/-------+---------------+
|
||
|
* | 3KB | 4KB | 2KB | 4KB |
|
||
|
* +-------+-------+-------+-------+---------------+--------+------+
|
||
|
* | +-------------------------+ |
|
||
|
* | | |
|
||
|
* | +--------------------|------+
|
||
|
* | | |
|
||
|
* +-------v-------+ +-------v-------+ +-------v-------+
|
||
|
* | A MEM BLOCK | | B MEM BLOCK | | C MEM BLOCK |
|
||
|
* +-------^-------+ +-------^-------+ +-------^-------+
|
||
|
* | | |
|
||
|
* +----------------+ | |
|
||
|
* | | |
|
||
|
* Segment(0) | | |
|
||
|
* +----------+----------+ | | |
|
||
|
* | Address: A +--+ | |
|
||
|
* +----------+----------+ | |
|
||
|
* | Type: 0h | Len: 3KB | | |
|
||
|
* +----------+----------+ | |
|
||
|
* | Address: Segment(1) +--+ | |
|
||
|
* +----------+----------+ | | |
|
||
|
* | Type: 2h | Len: 48 | | | |
|
||
|
* +----------+----------+ | | |
|
||
|
* | | |
|
||
|
* +------------------------+ | |
|
||
|
* | | |
|
||
|
* v | |
|
||
|
* Segment(1) | |
|
||
|
* +----------+----------+ | |
|
||
|
* | Address: B +------+ |
|
||
|
* +----------+----------+ |
|
||
|
* | Type: 0h | Len: 4KB | |
|
||
|
* +----------+----------+ |
|
||
|
* | Address: <NULL> | |
|
||
|
* +----------+----------+ |
|
||
|
* | Type: 1h | Len: 2KB | |
|
||
|
* +----------+----------+ |
|
||
|
* | Address: Segment(2) +--+ |
|
||
|
* +----------+----------+ | |
|
||
|
* | Type: 0h | Len: 16 | | |
|
||
|
* +----------+----------+ | |
|
||
|
* | |
|
||
|
* +------------------------+ |
|
||
|
* | |
|
||
|
* v |
|
||
|
* Segment(2) |
|
||
|
* +----------+----------+ |
|
||
|
* | Address: C +---------------------------+
|
||
|
* +----------+----------+
|
||
|
* | Type: 0h | Len: 4KB |
|
||
|
* +----------+----------+
|
||
|
*/
|
||
|
|
||
|
static rt_ssize_t nvme_blk_rw(struct rt_nvme_device *ndev, rt_off_t slba,
|
||
|
rt_ubase_t buffer_dma, rt_size_t lbas, rt_uint8_t opcode)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_uint16_t max_lbas;
|
||
|
rt_uint32_t lba_shift;
|
||
|
rt_size_t tlbas;
|
||
|
rt_ssize_t data_length;
|
||
|
struct rt_nvme_command cmd;
|
||
|
struct rt_nvme_controller *nvme = ndev->ctrl;
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.rw.opcode = opcode;
|
||
|
cmd.rw.flags = nvme->sgl_mode << RT_NVME_CMD_FLAGS_PSDT_SHIFT;
|
||
|
cmd.rw.nsid = rt_cpu_to_le32(ndev->nsid);
|
||
|
|
||
|
tlbas = lbas;
|
||
|
lba_shift = ndev->lba_shift;
|
||
|
max_lbas = 1 << (nvme->max_transfer_shift - lba_shift);
|
||
|
|
||
|
if (nvme->sgl_mode)
|
||
|
{
|
||
|
while ((rt_ssize_t)lbas > 0)
|
||
|
{
|
||
|
if (lbas < max_lbas)
|
||
|
{
|
||
|
max_lbas = (rt_uint16_t)lbas;
|
||
|
}
|
||
|
|
||
|
data_length = max_lbas << lba_shift;
|
||
|
|
||
|
cmd.rw.sgl.adddress = rt_cpu_to_le64(buffer_dma);
|
||
|
cmd.rw.sgl.length = rt_cpu_to_le32(data_length);
|
||
|
cmd.rw.sgl.sgl_identify = SGL_DESC_TYPE_DATA_BLOCK;
|
||
|
cmd.rw.slba = rt_cpu_to_le16(slba);
|
||
|
cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
|
||
|
|
||
|
if ((err = nvme_submit_io_cmd(nvme, &cmd)))
|
||
|
{
|
||
|
tlbas -= lbas;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
lbas -= max_lbas;
|
||
|
slba += max_lbas;
|
||
|
buffer_dma += data_length;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
void *prp_list = RT_NULL;
|
||
|
rt_size_t prp_list_size = 0, page_size;
|
||
|
|
||
|
page_size = nvme->page_size;
|
||
|
|
||
|
while ((rt_ssize_t)lbas > 0)
|
||
|
{
|
||
|
rt_uint64_t prp2_addr, dma_addr;
|
||
|
rt_ssize_t remain_length, page_offset;
|
||
|
|
||
|
if (lbas < max_lbas)
|
||
|
{
|
||
|
max_lbas = (rt_uint16_t)lbas;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* PRP transfer:
|
||
|
* 1. data_length <= 4KB:
|
||
|
* prp1 = buffer_dma
|
||
|
* prp2 = 0
|
||
|
*
|
||
|
* 2. 4KB < data_length <= 8KB:
|
||
|
* prp1 = buffer_dma
|
||
|
* prp2 = buffer_dma
|
||
|
*
|
||
|
* 3. 8KB < data_length:
|
||
|
* prp1 = buffer_dma(0, 4k)
|
||
|
* prp2 = buffer_dma(4k, ~)
|
||
|
*/
|
||
|
dma_addr = buffer_dma;
|
||
|
page_offset = buffer_dma & (page_size - 1);
|
||
|
data_length = max_lbas << lba_shift;
|
||
|
remain_length = data_length - (page_size - page_offset);
|
||
|
|
||
|
do {
|
||
|
rt_size_t prps_per_page, prps, pages;
|
||
|
rt_uint64_t *prp_list_ptr, prp_list_dma;
|
||
|
|
||
|
if (remain_length <= 0)
|
||
|
{
|
||
|
prp2_addr = 0;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (remain_length)
|
||
|
{
|
||
|
dma_addr += (page_size - page_offset);
|
||
|
}
|
||
|
|
||
|
if (remain_length <= page_size)
|
||
|
{
|
||
|
prp2_addr = dma_addr;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
prps_per_page = page_size / sizeof(rt_uint64_t);
|
||
|
prps = RT_DIV_ROUND_UP(remain_length, page_size);
|
||
|
pages = RT_DIV_ROUND_UP(prps - 1, prps_per_page - 1);
|
||
|
|
||
|
if (prps > prp_list_size)
|
||
|
{
|
||
|
if (prp_list)
|
||
|
{
|
||
|
rt_free_align(prp_list);
|
||
|
}
|
||
|
|
||
|
prp_list = rt_malloc_align(pages * page_size, page_size);
|
||
|
|
||
|
if (!prp_list)
|
||
|
{
|
||
|
LOG_D("No memory to create a PRP List");
|
||
|
/* Ask user to try again */
|
||
|
return tlbas - lbas;
|
||
|
}
|
||
|
|
||
|
prp_list_size = pages * (prps_per_page - 1) + 1;
|
||
|
}
|
||
|
prp_list_ptr = prp_list;
|
||
|
prp_list_dma = (rt_uint64_t)rt_kmem_v2p(prp_list_ptr);
|
||
|
|
||
|
prp2_addr = prp_list_dma;
|
||
|
|
||
|
for (int i = 0; prps; --prps, ++i)
|
||
|
{
|
||
|
/* End of the entry, fill the next entry addr if remain */
|
||
|
if ((i == (prps_per_page - 1)) && prps > 1)
|
||
|
{
|
||
|
prp_list_dma += page_size;
|
||
|
*prp_list_ptr = rt_cpu_to_le64(prp_list_dma);
|
||
|
|
||
|
/* Start to fill the next PRP */
|
||
|
i = 0;
|
||
|
}
|
||
|
|
||
|
*prp_list_ptr = rt_cpu_to_le64(dma_addr);
|
||
|
dma_addr += page_size;
|
||
|
}
|
||
|
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, prp_list_ptr, prp_list_size);
|
||
|
} while (0);
|
||
|
|
||
|
cmd.rw.prp1 = rt_cpu_to_le64(buffer_dma);
|
||
|
cmd.rw.prp2 = rt_cpu_to_le64(prp2_addr);
|
||
|
cmd.rw.slba = rt_cpu_to_le16(slba);
|
||
|
cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
|
||
|
|
||
|
if ((err = nvme_submit_io_cmd(nvme, &cmd)))
|
||
|
{
|
||
|
tlbas -= lbas;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
lbas -= max_lbas;
|
||
|
slba += max_lbas;
|
||
|
buffer_dma += data_length;
|
||
|
}
|
||
|
|
||
|
if (prp_list)
|
||
|
{
|
||
|
rt_free_align(prp_list);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return tlbas;
|
||
|
}
|
||
|
|
||
|
static rt_ssize_t nvme_blk_read(struct rt_blk_disk *disk, rt_off_t sector,
|
||
|
void *buffer, rt_size_t sector_count)
|
||
|
{
|
||
|
rt_ssize_t res;
|
||
|
rt_uint32_t page_bits;
|
||
|
rt_size_t buffer_size;
|
||
|
rt_ubase_t buffer_dma;
|
||
|
void *temp_buffer = RT_NULL;
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
struct rt_nvme_controller *nvme = ndev->ctrl;
|
||
|
|
||
|
buffer_size = (1 << ndev->lba_shift) * sector_count;
|
||
|
buffer_dma = (rt_ubase_t)rt_kmem_v2p(buffer);
|
||
|
|
||
|
if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
|
||
|
(!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
|
||
|
{
|
||
|
LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "read");
|
||
|
|
||
|
page_bits = rt_page_bits(buffer_size);
|
||
|
temp_buffer = rt_pages_alloc(page_bits);
|
||
|
|
||
|
if (!temp_buffer)
|
||
|
{
|
||
|
return -RT_ENOMEM;
|
||
|
}
|
||
|
|
||
|
buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
|
||
|
}
|
||
|
|
||
|
res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_READ);
|
||
|
|
||
|
if (res > 0)
|
||
|
{
|
||
|
if (res != sector_count)
|
||
|
{
|
||
|
/*
|
||
|
* Don't always aim for optimization, checking for equality
|
||
|
* is much faster than multiplication calculation.
|
||
|
*/
|
||
|
buffer_size = res * (1 << ndev->lba_shift);
|
||
|
}
|
||
|
|
||
|
if (temp_buffer)
|
||
|
{
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, temp_buffer, buffer_size);
|
||
|
rt_memcpy(buffer, temp_buffer, buffer_size);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, buffer, buffer_size);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (temp_buffer)
|
||
|
{
|
||
|
rt_pages_free(temp_buffer, page_bits);
|
||
|
}
|
||
|
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
static rt_ssize_t nvme_blk_write(struct rt_blk_disk *disk, rt_off_t sector,
|
||
|
const void *buffer, rt_size_t sector_count)
|
||
|
{
|
||
|
rt_ssize_t res;
|
||
|
rt_uint32_t page_bits;
|
||
|
rt_size_t buffer_size;
|
||
|
rt_ubase_t buffer_dma;
|
||
|
void *temp_buffer = RT_NULL;
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
struct rt_nvme_controller *nvme = ndev->ctrl;
|
||
|
|
||
|
buffer_size = (1 << ndev->lba_shift) * sector_count;
|
||
|
buffer_dma = (rt_ubase_t)rt_kmem_v2p((void *)buffer);
|
||
|
|
||
|
if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
|
||
|
(!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
|
||
|
{
|
||
|
LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "write");
|
||
|
|
||
|
page_bits = rt_page_bits(buffer_size);
|
||
|
temp_buffer = rt_pages_alloc(page_bits);
|
||
|
|
||
|
if (!temp_buffer)
|
||
|
{
|
||
|
return -RT_ENOMEM;
|
||
|
}
|
||
|
|
||
|
buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
|
||
|
|
||
|
rt_memcpy(temp_buffer, buffer, buffer_size);
|
||
|
buffer = temp_buffer;
|
||
|
}
|
||
|
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)buffer, buffer_size);
|
||
|
|
||
|
res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_WRITE);
|
||
|
|
||
|
if (temp_buffer)
|
||
|
{
|
||
|
rt_pages_free(temp_buffer, page_bits);
|
||
|
}
|
||
|
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_blk_getgeome(struct rt_blk_disk *disk,
|
||
|
struct rt_device_blk_geometry *geometry)
|
||
|
{
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
|
||
|
geometry->bytes_per_sector = 1 << ndev->lba_shift;
|
||
|
geometry->block_size = 1 << ndev->lba_shift;
|
||
|
geometry->sector_count = rt_le64_to_cpu(ndev->id.nsze);
|
||
|
|
||
|
return RT_EOK;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_blk_sync(struct rt_blk_disk *disk)
|
||
|
{
|
||
|
struct rt_nvme_command cmd;
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.common.opcode = RT_NVME_CMD_FLUSH;
|
||
|
cmd.common.nsid = rt_cpu_to_le32(ndev->nsid);
|
||
|
|
||
|
return nvme_submit_io_cmd(ndev->ctrl, &cmd);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_blk_erase(struct rt_blk_disk *disk)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_ssize_t slba, lbas, max_lbas;
|
||
|
struct rt_nvme_command cmd;
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
struct rt_nvme_controller *nvme = ndev->ctrl;
|
||
|
|
||
|
if (!nvme->write_zeroes)
|
||
|
{
|
||
|
return -RT_ENOSYS;
|
||
|
}
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.write_zeroes.opcode = RT_NVME_CMD_WRITE_ZEROES;
|
||
|
cmd.write_zeroes.nsid = rt_cpu_to_le32(ndev->nsid);
|
||
|
|
||
|
slba = 0;
|
||
|
lbas = rt_le64_to_cpu(ndev->id.nsze);
|
||
|
max_lbas = 1 << (nvme->max_transfer_shift - ndev->lba_shift);
|
||
|
|
||
|
while ((rt_ssize_t)lbas > 0)
|
||
|
{
|
||
|
if (lbas < max_lbas)
|
||
|
{
|
||
|
max_lbas = (rt_uint16_t)lbas;
|
||
|
}
|
||
|
|
||
|
cmd.write_zeroes.slba = rt_cpu_to_le16(slba);
|
||
|
cmd.write_zeroes.length = rt_cpu_to_le16(max_lbas - 1);
|
||
|
|
||
|
if ((err = nvme_submit_io_cmd(nvme, &cmd)))
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
lbas -= max_lbas;
|
||
|
slba += max_lbas;
|
||
|
}
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_blk_autorefresh(struct rt_blk_disk *disk, rt_bool_t is_auto)
|
||
|
{
|
||
|
struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
|
||
|
struct rt_nvme_controller *nvme = ndev->ctrl;
|
||
|
|
||
|
if (nvme->volatile_write_cache & RT_NVME_CTRL_VWC_PRESENT)
|
||
|
{
|
||
|
return nvme_set_features_simple(nvme, RT_NVME_FEAT_VOLATILE_WC, !!is_auto);
|
||
|
}
|
||
|
else if (!is_auto)
|
||
|
{
|
||
|
return RT_EOK;
|
||
|
}
|
||
|
|
||
|
return -RT_ENOSYS;
|
||
|
}
|
||
|
|
||
|
static const struct rt_blk_disk_ops nvme_blk_ops =
|
||
|
{
|
||
|
.read = nvme_blk_read,
|
||
|
.write = nvme_blk_write,
|
||
|
.getgeome = nvme_blk_getgeome,
|
||
|
.sync = nvme_blk_sync,
|
||
|
.erase = nvme_blk_erase,
|
||
|
.autorefresh = nvme_blk_autorefresh,
|
||
|
};
|
||
|
|
||
|
static void nvme_queue_isr(int irqno, void *param)
|
||
|
{
|
||
|
rt_ubase_t level;
|
||
|
rt_uint16_t head, phase, status;
|
||
|
struct rt_nvme_queue *queue = param;
|
||
|
struct rt_nvme_controller *nvme = queue->nvme;
|
||
|
|
||
|
level = rt_spin_lock_irqsave(&queue->lock);
|
||
|
|
||
|
head = queue->cq_head;
|
||
|
phase = queue->cq_phase;
|
||
|
status = HWREG16(&queue->cq_entry[head].status);
|
||
|
status = rt_le16_to_cpu(status);
|
||
|
|
||
|
if ((status & 0x01) == phase)
|
||
|
{
|
||
|
if ((status >> 1))
|
||
|
{
|
||
|
queue->err = -RT_EIO;
|
||
|
goto _end_cmd;
|
||
|
}
|
||
|
|
||
|
if (nvme->ops->complete_cmd)
|
||
|
{
|
||
|
nvme->ops->complete_cmd(queue, queue->cmd);
|
||
|
}
|
||
|
|
||
|
_end_cmd:
|
||
|
if (++head == queue->depth)
|
||
|
{
|
||
|
head = 0;
|
||
|
phase = !phase;
|
||
|
}
|
||
|
|
||
|
HWREG32(queue->doorbell + nvme->doorbell_stride) = head;
|
||
|
queue->cq_head = head;
|
||
|
queue->cq_phase = phase;
|
||
|
|
||
|
rt_completion_done(&queue->done);
|
||
|
}
|
||
|
|
||
|
rt_spin_unlock_irqrestore(&queue->lock, level);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_identify(struct rt_nvme_controller *nvme,
|
||
|
rt_uint32_t nsid, rt_uint32_t cns, void *data)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_uint32_t page_size = nvme->page_size;
|
||
|
rt_ubase_t data_phy = (rt_ubase_t)rt_kmem_v2p(data);
|
||
|
int offset = data_phy & (page_size - 1);
|
||
|
struct rt_nvme_command cmd;
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.identify.opcode = RT_NVME_ADMIN_OPCODE_IDENTIFY;
|
||
|
cmd.identify.nsid = rt_cpu_to_le32(nsid);
|
||
|
cmd.identify.prp1 = rt_cpu_to_le64(data_phy);
|
||
|
|
||
|
if (sizeof(struct rt_nvme_id_ctrl) <= page_size - offset)
|
||
|
{
|
||
|
cmd.identify.prp2 = 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
data_phy += (page_size - offset);
|
||
|
cmd.identify.prp2 = rt_cpu_to_le64(data_phy);
|
||
|
}
|
||
|
cmd.identify.cns = rt_cpu_to_le32(cns);
|
||
|
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, data, sizeof(struct rt_nvme_id_ctrl));
|
||
|
|
||
|
if (!(err = nvme_submit_cmd(&nvme->admin_queue, &cmd)))
|
||
|
{
|
||
|
rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, data, sizeof(struct rt_nvme_id_ctrl));
|
||
|
}
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_attach_queue(struct rt_nvme_queue *queue, rt_uint8_t opcode)
|
||
|
{
|
||
|
struct rt_nvme_command cmd;
|
||
|
struct rt_nvme_controller *nvme = queue->nvme;
|
||
|
rt_uint16_t flags = RT_NVME_QUEUE_PHYS_CONTIG;
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
|
||
|
if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_CQ)
|
||
|
{
|
||
|
cmd.create_cq.opcode = opcode;
|
||
|
cmd.create_cq.prp1 = rt_cpu_to_le64(queue->cq_entry_phy);
|
||
|
cmd.create_cq.cqid = rt_cpu_to_le16(queue->qid);
|
||
|
cmd.create_cq.qsize = rt_cpu_to_le16(queue->depth - 1);
|
||
|
cmd.create_cq.cq_flags = rt_cpu_to_le16(flags | RT_NVME_CQ_IRQ_ENABLED);
|
||
|
cmd.create_cq.irq_vector = rt_cpu_to_le16(nvme->irqs_nr > 1 ? queue->qid : 0);
|
||
|
}
|
||
|
else if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_SQ)
|
||
|
{
|
||
|
cmd.create_sq.opcode = opcode;
|
||
|
cmd.create_sq.prp1 = rt_cpu_to_le64(queue->sq_cmds_phy);
|
||
|
cmd.create_sq.sqid = rt_cpu_to_le16(queue->qid);
|
||
|
cmd.create_sq.qsize = rt_cpu_to_le16(queue->depth - 1);
|
||
|
cmd.create_sq.sq_flags = rt_cpu_to_le16(flags | RT_NVME_SQ_PRIO_MEDIUM);
|
||
|
cmd.create_sq.cqid = rt_cpu_to_le16(queue->qid);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
LOG_E("What the fuck opcode = %x", opcode);
|
||
|
RT_ASSERT(0);
|
||
|
}
|
||
|
|
||
|
return nvme_submit_cmd(&nvme->admin_queue, &cmd);
|
||
|
}
|
||
|
|
||
|
rt_inline rt_err_t nvme_attach_queue_sq(struct rt_nvme_queue *queue)
|
||
|
{
|
||
|
return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_SQ);
|
||
|
}
|
||
|
|
||
|
rt_inline rt_err_t nvme_attach_queue_cq(struct rt_nvme_queue *queue)
|
||
|
{
|
||
|
return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_CQ);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_detach_queue(struct rt_nvme_queue *queue,
|
||
|
rt_uint8_t opcode)
|
||
|
{
|
||
|
struct rt_nvme_command cmd;
|
||
|
struct rt_nvme_controller *nvme = queue->nvme;
|
||
|
|
||
|
rt_memset(&cmd, 0, sizeof(cmd));
|
||
|
cmd.delete_queue.opcode = opcode;
|
||
|
cmd.delete_queue.qid = rt_cpu_to_le16(queue->qid);
|
||
|
|
||
|
return nvme_submit_cmd(&nvme->admin_queue, &cmd);
|
||
|
}
|
||
|
|
||
|
rt_inline rt_ubase_t nvme_queue_dma_flags(void)
|
||
|
{
|
||
|
return RT_DMA_F_NOCACHE | RT_DMA_F_LINEAR;
|
||
|
}
|
||
|
|
||
|
static void nvme_free_queue(struct rt_nvme_queue *queue)
|
||
|
{
|
||
|
rt_ubase_t dma_flags;
|
||
|
struct rt_nvme_controller *nvme = queue->nvme;
|
||
|
|
||
|
if (nvme->ops->cleanup_queue)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
|
||
|
if (!(err = nvme->ops->cleanup_queue(queue)))
|
||
|
{
|
||
|
LOG_W("Cleanup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
dma_flags = nvme_queue_dma_flags();
|
||
|
|
||
|
if (queue->sq_cmds)
|
||
|
{
|
||
|
rt_dma_free(nvme->dev, sizeof(*queue->sq_cmds) * queue->depth,
|
||
|
queue->sq_cmds, queue->sq_cmds_phy, dma_flags);
|
||
|
}
|
||
|
|
||
|
if (queue->cq_entry)
|
||
|
{
|
||
|
rt_dma_free(nvme->dev, sizeof(*queue->cq_entry) * queue->depth,
|
||
|
queue->cq_entry, queue->cq_entry_phy, dma_flags);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static struct rt_nvme_queue *nvme_alloc_queue(struct rt_nvme_controller *nvme,
|
||
|
int qid, int depth)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_ubase_t dma_flags;
|
||
|
struct rt_nvme_queue *queue = &nvme->queue[qid];
|
||
|
|
||
|
rt_memset(queue, 0, sizeof(*queue));
|
||
|
|
||
|
queue->nvme = nvme;
|
||
|
queue->doorbell = &nvme->doorbell_tbl[qid * 2 * nvme->doorbell_stride];
|
||
|
queue->qid = qid;
|
||
|
queue->depth = depth;
|
||
|
queue->cq_head = 0;
|
||
|
queue->cq_phase = 1;
|
||
|
rt_completion_init(&queue->done);
|
||
|
rt_spin_lock_init(&queue->lock);
|
||
|
|
||
|
dma_flags = nvme_queue_dma_flags();
|
||
|
|
||
|
/* struct rt_nvme_command */
|
||
|
queue->sq_cmds = rt_dma_alloc(nvme->dev,
|
||
|
sizeof(*queue->sq_cmds) * depth, &queue->sq_cmds_phy, dma_flags);
|
||
|
|
||
|
if (!queue->sq_cmds)
|
||
|
{
|
||
|
err = -RT_ENOMEM;
|
||
|
goto _fail;
|
||
|
}
|
||
|
|
||
|
/* struct rt_nvme_completion */
|
||
|
queue->cq_entry = rt_dma_alloc(nvme->dev,
|
||
|
sizeof(*queue->cq_entry) * depth, &queue->cq_entry_phy, dma_flags);
|
||
|
|
||
|
if (!queue->cq_entry)
|
||
|
{
|
||
|
err = -RT_ENOMEM;
|
||
|
goto _fail;
|
||
|
}
|
||
|
|
||
|
rt_memset(queue->sq_cmds, 0, sizeof(struct rt_nvme_command) * depth);
|
||
|
rt_memset(queue->cq_entry, 0, sizeof(struct rt_nvme_completion) * depth);
|
||
|
|
||
|
if (nvme->ops->setup_queue)
|
||
|
{
|
||
|
if (!(err = nvme->ops->setup_queue(queue)))
|
||
|
{
|
||
|
LOG_E("Setup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
|
||
|
|
||
|
goto _fail;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return queue;
|
||
|
|
||
|
_fail:
|
||
|
nvme_free_queue(queue);
|
||
|
|
||
|
return rt_err_ptr(err);
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_configure_admin_queue(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
int irq;
|
||
|
char name[RT_NAME_MAX];
|
||
|
rt_uint32_t aqa;
|
||
|
rt_uint32_t page_shift = ARCH_PAGE_SHIFT;
|
||
|
rt_uint32_t page_min = RT_NVME_CAP_MPSMIN(nvme->cap) + 12;
|
||
|
rt_uint32_t page_max = RT_NVME_CAP_MPSMAX(nvme->cap) + 12;
|
||
|
struct rt_nvme_queue *admin_queue;
|
||
|
|
||
|
if (page_shift < page_min)
|
||
|
{
|
||
|
LOG_E("Device %s page size (%u) %s than host (%u)",
|
||
|
"minimum", 1 << page_min, "larger", 1 << page_shift);
|
||
|
return -RT_EINVAL;
|
||
|
}
|
||
|
|
||
|
if (page_shift > page_max)
|
||
|
{
|
||
|
LOG_W("Device %s page size (%u) %s than host (%u)",
|
||
|
"maximum", 1 << page_max, "smaller", 1 << page_shift);
|
||
|
page_shift = page_max;
|
||
|
}
|
||
|
|
||
|
if ((err = nvme_disable_ctrl(nvme)))
|
||
|
{
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
admin_queue = nvme_alloc_queue(nvme, 0, RT_NVME_AQ_DEPTH);
|
||
|
|
||
|
if (rt_is_err(admin_queue))
|
||
|
{
|
||
|
return rt_ptr_err(admin_queue);
|
||
|
}
|
||
|
|
||
|
aqa = admin_queue->depth - 1;
|
||
|
aqa |= aqa << 16;
|
||
|
|
||
|
nvme->page_shift = page_shift;
|
||
|
nvme->page_size = 1U << page_shift;
|
||
|
|
||
|
nvme->ctrl_config = RT_NVME_CC_CSS_NVM;
|
||
|
nvme->ctrl_config |= (page_shift - 12) << RT_NVME_CC_MPS_SHIFT;
|
||
|
nvme->ctrl_config |= RT_NVME_CC_ARB_RR | RT_NVME_CC_SHN_NONE;
|
||
|
nvme->ctrl_config |= RT_NVME_CC_IOSQES | RT_NVME_CC_IOCQES;
|
||
|
|
||
|
nvme_writel(nvme, RT_NVME_REG_AQA, aqa);
|
||
|
nvme_writeq(nvme, RT_NVME_REG_ASQ, admin_queue->sq_cmds_phy);
|
||
|
nvme_writeq(nvme, RT_NVME_REG_ACQ, admin_queue->cq_entry_phy);
|
||
|
|
||
|
if ((err = nvme_enable_ctrl(nvme)))
|
||
|
{
|
||
|
nvme_free_queue(admin_queue);
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
irq = nvme->irqs[0];
|
||
|
|
||
|
rt_snprintf(name, RT_NAME_MAX, "%s-admin-queue", nvme->name);
|
||
|
|
||
|
rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->admin_queue, name);
|
||
|
rt_hw_interrupt_umask(irq);
|
||
|
|
||
|
return RT_EOK;
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_setup_io_queues(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_uint32_t value;
|
||
|
int irq, cpuid = 0;
|
||
|
char name[RT_NAME_MAX];
|
||
|
rt_bool_t affinity_fixup = RT_FALSE;
|
||
|
RT_DECLARE_IRQ_AFFINITY(affinity) = { 0 };
|
||
|
struct rt_nvme_queue *queue;
|
||
|
|
||
|
nvme->io_queue_max = nvme->irqs_nr > 1 ? nvme->irqs_nr - 1 : 1;
|
||
|
value = (nvme->io_queue_max - 1) | ((nvme->io_queue_max - 1) << 16);
|
||
|
|
||
|
if ((err = nvme_set_features_simple(nvme, RT_NVME_FEAT_NUM_QUEUES, value)))
|
||
|
{
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
for (int i = 0, q_idx = 1; i < nvme->io_queue_max; ++i, ++q_idx)
|
||
|
{
|
||
|
queue = nvme_alloc_queue(nvme, q_idx, nvme->queue_depth);
|
||
|
|
||
|
if (!queue)
|
||
|
{
|
||
|
return -RT_ENOMEM;
|
||
|
}
|
||
|
|
||
|
if ((err = nvme_attach_queue_cq(queue)) ||
|
||
|
(err = nvme_attach_queue_sq(queue)))
|
||
|
{
|
||
|
return err;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
|
||
|
{
|
||
|
irq = nvme->irqs[irq_idx % nvme->irqs_nr];
|
||
|
|
||
|
rt_snprintf(name, RT_NAME_MAX, "%s-io-queue%d", nvme->name, i);
|
||
|
|
||
|
if (!affinity_fixup)
|
||
|
{
|
||
|
RT_IRQ_AFFINITY_SET(affinity, cpuid % RT_CPUS_NR);
|
||
|
if (rt_pic_irq_set_affinity(irq, affinity))
|
||
|
{
|
||
|
/* Fixup in secondary CPU startup */
|
||
|
affinity_fixup = RT_TRUE;
|
||
|
}
|
||
|
RT_IRQ_AFFINITY_CLEAR(affinity, cpuid++ % RT_CPUS_NR);
|
||
|
}
|
||
|
|
||
|
rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->io_queues[i], name);
|
||
|
rt_hw_interrupt_umask(irq);
|
||
|
}
|
||
|
|
||
|
return RT_EOK;
|
||
|
}
|
||
|
|
||
|
static void nvme_remove_io_queues(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
int irq;
|
||
|
struct rt_nvme_queue *queue;
|
||
|
|
||
|
for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
|
||
|
{
|
||
|
queue = &nvme->io_queues[i];
|
||
|
|
||
|
nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_SQ);
|
||
|
nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_CQ);
|
||
|
nvme_free_queue(queue);
|
||
|
|
||
|
irq = nvme->irqs[irq_idx % nvme->irqs_nr];
|
||
|
|
||
|
rt_hw_interrupt_mask(irq);
|
||
|
rt_pic_detach_irq(irq, queue);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void nvme_remove_admin_queues(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
int irq = nvme->irqs[0];
|
||
|
|
||
|
rt_hw_interrupt_mask(irq);
|
||
|
rt_pic_detach_irq(irq, &nvme->admin_queue);
|
||
|
|
||
|
nvme_free_queue(&nvme->admin_queue);
|
||
|
}
|
||
|
|
||
|
static void nvme_remove_devices(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
struct rt_nvme_device *ndev, *next_ndev;
|
||
|
|
||
|
rt_list_for_each_entry_safe(ndev, next_ndev, &nvme->ns_nodes, list)
|
||
|
{
|
||
|
rt_list_remove(&ndev->list);
|
||
|
|
||
|
rt_hw_blk_disk_unregister(&ndev->parent);
|
||
|
rt_free(ndev);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static rt_err_t nvme_scan_device(struct rt_nvme_controller *nvme,
|
||
|
rt_size_t number_of_ns)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
rt_uint32_t lbaf;
|
||
|
struct rt_nvme_id_ns *id = RT_NULL;
|
||
|
|
||
|
if (!(id = rt_malloc_align(sizeof(*id), nvme->page_size)))
|
||
|
{
|
||
|
return -RT_ENOMEM;
|
||
|
}
|
||
|
|
||
|
/* NVME Namespace is start with "1" */
|
||
|
for (rt_uint32_t nsid = 1; nsid <= number_of_ns; ++nsid)
|
||
|
{
|
||
|
struct rt_nvme_device *ndev = rt_calloc(1, sizeof(*ndev));
|
||
|
|
||
|
if (!ndev)
|
||
|
{
|
||
|
err = -RT_ENOMEM;
|
||
|
goto _free_res;
|
||
|
}
|
||
|
|
||
|
rt_memset(id, 0, sizeof(*id));
|
||
|
if ((err = nvme_identify(nvme, nsid, 0, id)))
|
||
|
{
|
||
|
goto _free_res;
|
||
|
}
|
||
|
|
||
|
if (!id->nsze)
|
||
|
{
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
ndev->ctrl = nvme;
|
||
|
|
||
|
rt_memcpy(&ndev->id, id, sizeof(ndev->id));
|
||
|
lbaf = id->flbas & RT_NVME_NS_FLBAS_LBA_MASK;
|
||
|
lbaf |= ((id->flbas & RT_NVME_NS_FLBAS_LBA_UMASK) >> RT_NVME_NS_FLBAS_LBA_SHIFT);
|
||
|
|
||
|
ndev->nsid = nsid;
|
||
|
ndev->lba_shift = id->lbaf[lbaf].ds;
|
||
|
|
||
|
ndev->parent.ida = &nvme_ida;
|
||
|
ndev->parent.parallel_io = RT_TRUE;
|
||
|
ndev->parent.ops = &nvme_blk_ops;
|
||
|
ndev->parent.max_partitions = RT_BLK_PARTITION_MAX;
|
||
|
rt_dm_dev_set_name(&ndev->parent.parent, "%sn%u", nvme->name, nsid);
|
||
|
|
||
|
if ((err = rt_hw_blk_disk_register(&ndev->parent)))
|
||
|
{
|
||
|
goto _free_res;
|
||
|
}
|
||
|
|
||
|
rt_list_init(&ndev->list);
|
||
|
rt_list_insert_before(&nvme->ns_nodes, &ndev->list);
|
||
|
}
|
||
|
|
||
|
_free_res:
|
||
|
rt_free_align(id);
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
rt_inline rt_size_t strip_len(const char *str, rt_size_t max_len)
|
||
|
{
|
||
|
rt_size_t size = 0;
|
||
|
|
||
|
for (int i = 0; *str && i < max_len; ++i, ++str)
|
||
|
{
|
||
|
if (*str != ' ')
|
||
|
{
|
||
|
size = i + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return size;
|
||
|
}
|
||
|
|
||
|
rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
struct rt_nvme_id_ctrl *ctrl = RT_NULL;
|
||
|
|
||
|
if (!nvme || !nvme->ops)
|
||
|
{
|
||
|
return -RT_EINVAL;
|
||
|
}
|
||
|
|
||
|
if (nvme_readl(nvme, RT_NVME_REG_CSTS) == (rt_uint32_t)-1)
|
||
|
{
|
||
|
LOG_E("Out of memory");
|
||
|
|
||
|
return -RT_EINVAL;
|
||
|
}
|
||
|
|
||
|
if ((nvme->nvme_id = rt_dm_ida_alloc(&nvme_controller_ida)) < 0)
|
||
|
{
|
||
|
return -RT_EFULL;
|
||
|
}
|
||
|
|
||
|
rt_snprintf(nvme->name, RT_NAME_MAX, "nvme%u", nvme->nvme_id);
|
||
|
|
||
|
nvme->cap = nvme_readq(nvme, RT_NVME_REG_CAP);
|
||
|
nvme->queue_depth = RT_NVME_CAP_MQES(nvme->cap) + 1;
|
||
|
nvme->doorbell_stride = 1 << RT_NVME_CAP_STRIDE(nvme->cap);
|
||
|
nvme->doorbell_tbl = nvme->regs + RT_NVME_REG_DBS;
|
||
|
|
||
|
if ((err = nvme_configure_admin_queue(nvme)))
|
||
|
{
|
||
|
LOG_E("Configure admin queue error = %s", rt_strerror(err));
|
||
|
goto _free_admin_queue;
|
||
|
}
|
||
|
|
||
|
if ((err = nvme_setup_io_queues(nvme)))
|
||
|
{
|
||
|
LOG_E("Unable to setup I/O queues error = %s", rt_strerror(err));
|
||
|
goto _free_admin_queue;
|
||
|
}
|
||
|
|
||
|
if (!(ctrl = rt_malloc_align(sizeof(*ctrl), nvme->page_size)))
|
||
|
{
|
||
|
err = -RT_ENOMEM;
|
||
|
goto _fail;
|
||
|
}
|
||
|
|
||
|
if ((err = nvme_identify(nvme, 0, 1, ctrl)))
|
||
|
{
|
||
|
goto _fail;
|
||
|
}
|
||
|
|
||
|
if (ctrl->mdts)
|
||
|
{
|
||
|
nvme->max_transfer_shift = ctrl->mdts + (RT_NVME_CAP_MPSMIN(nvme->cap) + 12);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* 1MB is recommended. */
|
||
|
nvme->max_transfer_shift = 20;
|
||
|
}
|
||
|
nvme->volatile_write_cache = ctrl->vwc;
|
||
|
nvme->write_zeroes = !!(rt_le64_to_cpu(ctrl->oncs) & RT_NVME_CTRL_ONCS_WRITE_ZEROES);
|
||
|
|
||
|
if ((rt_le32_to_cpu(ctrl->sgls) & RT_NVME_ID_SGL_SUPPORT_MASK))
|
||
|
{
|
||
|
nvme->sgl_mode = RT_NVME_PSDT_SGL_MPTR_SGL;
|
||
|
}
|
||
|
|
||
|
LOG_I("NVM Express v%d.%d (%s, %-*.s, %-*.s)",
|
||
|
nvme_readl(nvme, RT_NVME_REG_VS) >> 16,
|
||
|
nvme_readl(nvme, RT_NVME_REG_VS) & 0xff,
|
||
|
nvme->ops->name,
|
||
|
strip_len(ctrl->mn, sizeof(ctrl->mn)), ctrl->mn,
|
||
|
strip_len(ctrl->fr, sizeof(ctrl->fr)), ctrl->fr);
|
||
|
|
||
|
rt_list_init(&nvme->ns_nodes);
|
||
|
if ((err = nvme_scan_device(nvme, rt_le32_to_cpu(ctrl->nn))))
|
||
|
{
|
||
|
goto _fail;
|
||
|
}
|
||
|
|
||
|
rt_free_align(ctrl);
|
||
|
|
||
|
rt_spin_lock(&nvme_lock);
|
||
|
rt_list_insert_after(&nvme_nodes, &nvme->list);
|
||
|
rt_spin_unlock(&nvme_lock);
|
||
|
|
||
|
return RT_EOK;
|
||
|
|
||
|
_fail:
|
||
|
if (ctrl)
|
||
|
{
|
||
|
rt_free_align(ctrl);
|
||
|
}
|
||
|
nvme_remove_devices(nvme);
|
||
|
nvme_remove_io_queues(nvme);
|
||
|
_free_admin_queue:
|
||
|
nvme_remove_admin_queues(nvme);
|
||
|
|
||
|
rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme)
|
||
|
{
|
||
|
rt_err_t err;
|
||
|
|
||
|
if (!nvme)
|
||
|
{
|
||
|
return -RT_EINVAL;
|
||
|
}
|
||
|
|
||
|
rt_spin_lock(&nvme_lock);
|
||
|
rt_list_remove(&nvme->list);
|
||
|
rt_spin_unlock(&nvme_lock);
|
||
|
|
||
|
nvme_remove_devices(nvme);
|
||
|
nvme_remove_io_queues(nvme);
|
||
|
nvme_remove_admin_queues(nvme);
|
||
|
|
||
|
rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
|
||
|
|
||
|
if (!(err = nvme_shutdown_ctrl(nvme)))
|
||
|
{
|
||
|
err = nvme_disable_ctrl(nvme);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
LOG_E("%s: shutdown error = %s", nvme->name, rt_strerror(err));
|
||
|
}
|
||
|
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* NVME's IO queue should be Per-CPU, fixup the affinity after the secondary CPU
|
||
|
* startup, this stage can make sure the affinity setting success as possible.
|
||
|
*/
|
||
|
static int nvme_queue_affinify_fixup(void)
|
||
|
{
|
||
|
int cpuid = rt_hw_cpu_id();
|
||
|
struct rt_nvme_controller *nvme;
|
||
|
RT_DECLARE_IRQ_AFFINITY(affinity) = { 0 };
|
||
|
RT_DECLARE_IRQ_AFFINITY(current_affinity) = { 0 };
|
||
|
|
||
|
RT_IRQ_AFFINITY_SET(affinity, cpuid);
|
||
|
|
||
|
rt_hw_spin_lock(&nvme_lock.lock);
|
||
|
rt_list_for_each_entry(nvme, &nvme_nodes, list)
|
||
|
{
|
||
|
for (int i = cpuid % RT_CPUS_NR; i < nvme->io_queue_max; i += RT_CPUS_NR)
|
||
|
{
|
||
|
int irq = nvme->irqs[i];
|
||
|
|
||
|
if (!rt_pic_irq_get_affinity(irq, current_affinity) &&
|
||
|
!rt_bitmap_test_bit(current_affinity, cpuid))
|
||
|
{
|
||
|
rt_ubase_t level = rt_hw_interrupt_disable();
|
||
|
|
||
|
rt_pic_irq_set_affinity(irq, affinity);
|
||
|
|
||
|
rt_hw_interrupt_enable(level);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
rt_hw_spin_unlock(&nvme_lock.lock);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
INIT_SECONDARY_CPU_EXPORT(nvme_queue_affinify_fixup);
|