diff --git a/components/drivers/Kconfig b/components/drivers/Kconfig index b940c482e8..56eec40d01 100755 --- a/components/drivers/Kconfig +++ b/components/drivers/Kconfig @@ -22,6 +22,7 @@ rsource "graphic/Kconfig" rsource "hwcrypto/Kconfig" rsource "wlan/Kconfig" rsource "block/Kconfig" +rsource "nvme/Kconfig" rsource "scsi/Kconfig" rsource "virtio/Kconfig" rsource "dma/Kconfig" diff --git a/components/drivers/include/drivers/nvme.h b/components/drivers/include/drivers/nvme.h new file mode 100644 index 0000000000..1eca5d9caa --- /dev/null +++ b/components/drivers/include/drivers/nvme.h @@ -0,0 +1,899 @@ +/* + * Copyright (c) 2006-2023, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2023-02-25 GuEe-GUI the first version + */ + +#ifndef __NVME_H__ +#define __NVME_H__ + +#include +#include +#include + +#define NVME_RSVD(offset, bytes_size) rt_uint8_t __rsvd##offset[bytes_size] + +enum +{ + /* + * Generic Command Status: + */ + RT_NVME_SC_SUCCESS = 0x0, + RT_NVME_SC_INVALID_OPCODE = 0x1, + RT_NVME_SC_INVALID_FIELD = 0x2, + RT_NVME_SC_CMDID_CONFLICT = 0x3, + RT_NVME_SC_DATA_XFER_ERROR = 0x4, + RT_NVME_SC_POWER_LOSS = 0x5, + RT_NVME_SC_INTERNAL = 0x6, + RT_NVME_SC_ABORT_REQ = 0x7, + RT_NVME_SC_ABORT_QUEUE = 0x8, + RT_NVME_SC_FUSED_FAIL = 0x9, + RT_NVME_SC_FUSED_MISSING = 0xa, + RT_NVME_SC_INVALID_NS = 0xb, + RT_NVME_SC_CMD_SEQ_ERROR = 0xc, + RT_NVME_SC_SGL_INVALID_LAST = 0xd, + RT_NVME_SC_SGL_INVALID_COUNT = 0xe, + RT_NVME_SC_SGL_INVALID_DATA = 0xf, + RT_NVME_SC_SGL_INVALID_METADATA = 0x10, + RT_NVME_SC_SGL_INVALID_TYPE = 0x11, + RT_NVME_SC_CMB_INVALID_USE = 0x12, + RT_NVME_SC_PRP_INVALID_OFFSET = 0x13, + RT_NVME_SC_ATOMIC_WU_EXCEEDED = 0x14, + RT_NVME_SC_OP_DENIED = 0x15, + RT_NVME_SC_SGL_INVALID_OFFSET = 0x16, + RT_NVME_SC_RESERVED = 0x17, + RT_NVME_SC_HOST_ID_INCONSIST = 0x18, + RT_NVME_SC_KA_TIMEOUT_EXPIRED = 0x19, + RT_NVME_SC_KA_TIMEOUT_INVALID = 0x1a, + RT_NVME_SC_ABORTED_PREEMPT_ABORT = 0x1b, + RT_NVME_SC_SANITIZE_FAILED = 0x1c, + RT_NVME_SC_SANITIZE_IN_PROGRESS = 0x1d, + RT_NVME_SC_SGL_INVALID_GRANULARITY = 0x1e, + RT_NVME_SC_CMD_NOT_SUP_CMB_QUEUE = 0x1f, + RT_NVME_SC_NS_WRITE_PROTECTED = 0x20, + RT_NVME_SC_CMD_INTERRUPTED = 0x21, + RT_NVME_SC_TRANSIENT_TR_ERR = 0x22, + RT_NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24, + RT_NVME_SC_INVALID_IO_CMD_SET = 0x2c, + + RT_NVME_SC_LBA_RANGE = 0x80, + RT_NVME_SC_CAP_EXCEEDED = 0x81, + RT_NVME_SC_NS_NOT_READY = 0x82, + RT_NVME_SC_RESERVATION_CONFLICT = 0x83, + RT_NVME_SC_FORMAT_IN_PROGRESS = 0x84, + + /* + * Command Specific Status: + */ + RT_NVME_SC_CQ_INVALID = 0x100, + RT_NVME_SC_QID_INVALID = 0x101, + RT_NVME_SC_QUEUE_SIZE = 0x102, + RT_NVME_SC_ABORT_LIMIT = 0x103, + RT_NVME_SC_ABORT_MISSING = 0x104, + RT_NVME_SC_ASYNC_LIMIT = 0x105, + RT_NVME_SC_FIRMWARE_SLOT = 0x106, + RT_NVME_SC_FIRMWARE_IMAGE = 0x107, + RT_NVME_SC_INVALID_VECTOR = 0x108, + RT_NVME_SC_INVALID_LOG_PAGE = 0x109, + RT_NVME_SC_INVALID_FORMAT = 0x10a, + RT_NVME_SC_FW_NEEDS_CONV_RESET = 0x10b, + RT_NVME_SC_INVALID_QUEUE = 0x10c, + RT_NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + RT_NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + RT_NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + RT_NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, + RT_NVME_SC_FW_NEEDS_RESET = 0x111, + RT_NVME_SC_FW_NEEDS_MAX_TIME = 0x112, + RT_NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113, + RT_NVME_SC_OVERLAPPING_RANGE = 0x114, + RT_NVME_SC_NS_INSUFFICIENT_CAP = 0x115, + RT_NVME_SC_NS_ID_UNAVAILABLE = 0x116, + RT_NVME_SC_NS_ALREADY_ATTACHED = 0x118, + RT_NVME_SC_NS_IS_PRIVATE = 0x119, + RT_NVME_SC_NS_NOT_ATTACHED = 0x11a, + RT_NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, + RT_NVME_SC_CTRL_LIST_INVALID = 0x11c, + RT_NVME_SC_SELT_TEST_IN_PROGRESS = 0x11d, + RT_NVME_SC_BP_WRITE_PROHIBITED = 0x11e, + RT_NVME_SC_CTRL_ID_INVALID = 0x11f, + RT_NVME_SC_SEC_CTRL_STATE_INVALID = 0x120, + RT_NVME_SC_CTRL_RES_NUM_INVALID = 0x121, + RT_NVME_SC_RES_ID_INVALID = 0x122, + RT_NVME_SC_PMR_SAN_PROHIBITED = 0x123, + RT_NVME_SC_ANA_GROUP_ID_INVALID = 0x124, + RT_NVME_SC_ANA_ATTACH_FAILED = 0x125, + + /* + * I/O Command Set Specific - NVM commands: + */ + RT_NVME_SC_BAD_ATTRIBUTES = 0x180, + RT_NVME_SC_INVALID_PI = 0x181, + RT_NVME_SC_READ_ONLY = 0x182, + RT_NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + + /* + * I/O Command Set Specific - Fabrics commands: + */ + RT_NVME_SC_CONNECT_FORMAT = 0x180, + RT_NVME_SC_CONNECT_CTRL_BUSY = 0x181, + RT_NVME_SC_CONNECT_INVALID_PARAM = 0x182, + RT_NVME_SC_CONNECT_RESTART_DISC = 0x183, + RT_NVME_SC_CONNECT_INVALID_HOST = 0x184, + + RT_NVME_SC_DISCOVERY_RESTART = 0x190, + RT_NVME_SC_AUTH_REQUIRED = 0x191, + + /* + * I/O Command Set Specific - Zoned commands: + */ + RT_NVME_SC_ZONE_BOUNDARY_ERROR = 0x1b8, + RT_NVME_SC_ZONE_FULL = 0x1b9, + RT_NVME_SC_ZONE_READ_ONLY = 0x1ba, + RT_NVME_SC_ZONE_OFFLINE = 0x1bb, + RT_NVME_SC_ZONE_INVALID_WRITE = 0x1bc, + RT_NVME_SC_ZONE_TOO_MANY_ACTIVE = 0x1bd, + RT_NVME_SC_ZONE_TOO_MANY_OPEN = 0x1be, + RT_NVME_SC_ZONE_INVALID_TRANSITION = 0x1bf, + + /* + * Media and Data Integrity Errors: + */ + RT_NVME_SC_WRITE_FAULT = 0x280, + RT_NVME_SC_READ_ERROR = 0x281, + RT_NVME_SC_GUARD_CHECK = 0x282, + RT_NVME_SC_APPTAG_CHECK = 0x283, + RT_NVME_SC_REFTAG_CHECK = 0x284, + RT_NVME_SC_COMPARE_FAILED = 0x285, + RT_NVME_SC_ACCESS_DENIED = 0x286, + RT_NVME_SC_UNWRITTEN_BLOCK = 0x287, + + /* + * Path-related Errors: + */ + RT_NVME_SC_INTERNAL_PATH_ERROR = 0x300, + RT_NVME_SC_ANA_PERSISTENT_LOSS = 0x301, + RT_NVME_SC_ANA_INACCESSIBLE = 0x302, + RT_NVME_SC_ANA_TRANSITION = 0x303, + RT_NVME_SC_CTRL_PATH_ERROR = 0x360, + RT_NVME_SC_HOST_PATH_ERROR = 0x370, + RT_NVME_SC_HOST_ABORTED_CMD = 0x371, + + RT_NVME_SC_CRD = 0x1800, + RT_NVME_SC_MORE = 0x2000, + RT_NVME_SC_DNR = 0x4000, +}; + +/* Admin commands */ +enum +{ + RT_NVME_ADMIN_OPCODE_DELETE_SQ = 0x00, + RT_NVME_ADMIN_OPCODE_CREATE_SQ = 0x01, + RT_NVME_ADMIN_OPCODE_GET_LOG_PAGE = 0x02, + RT_NVME_ADMIN_OPCODE_DELETE_CQ = 0x04, + RT_NVME_ADMIN_OPCODE_CREATE_CQ = 0x05, + RT_NVME_ADMIN_OPCODE_IDENTIFY = 0x06, + RT_NVME_ADMIN_OPCODE_ABORT_CMD = 0x08, + RT_NVME_ADMIN_OPCODE_SET_FEATURES = 0x09, + RT_NVME_ADMIN_OPCODE_GET_FEATURES = 0x0a, + RT_NVME_ADMIN_OPCODE_ASYNC_EVENT = 0x0c, + RT_NVME_ADMIN_OPCODE_NS_MGMT = 0x0d, + RT_NVME_ADMIN_OPCODE_ACTIVATE_FW = 0x10, + RT_NVME_ADMIN_OPCODE_DOWNLOAD_FW = 0x11, + RT_NVME_ADMIN_OPCODE_DEV_SELF_TEST = 0x14, + RT_NVME_ADMIN_OPCODE_NS_ATTACH = 0x15, + RT_NVME_ADMIN_OPCODE_KEEP_ALIVE = 0x18, + RT_NVME_ADMIN_OPCODE_DIRECTIVE_SEND = 0x19, + RT_NVME_ADMIN_OPCODE_DIRECTIVE_RECV = 0x1a, + RT_NVME_ADMIN_OPCODE_VIRTUAL_MGMT = 0x1c, + RT_NVME_ADMIN_OPCODE_NVME_MI_SEND = 0x1d, + RT_NVME_ADMIN_OPCODE_NVME_MI_RECV = 0x1e, + RT_NVME_ADMIN_OPCODE_DBBUF = 0x7c, + RT_NVME_ADMIN_OPCODE_FORMAT_NVM = 0x80, + RT_NVME_ADMIN_OPCODE_SECURITY_SEND = 0x81, + RT_NVME_ADMIN_OPCODE_SECURITY_RECV = 0x82, + RT_NVME_ADMIN_OPCODE_SANITIZE_NVM = 0x84, + RT_NVME_ADMIN_OPCODE_GET_LBA_STATUS = 0x86, + RT_NVME_ADMIN_OPCODE_VENDOR_START = 0xc0, +}; + +/* I/O commands */ +enum +{ + RT_NVME_CMD_FLUSH = 0x00, + RT_NVME_CMD_WRITE = 0x01, + RT_NVME_CMD_READ = 0x02, + RT_NVME_CMD_WRITE_UNCOR = 0x04, + RT_NVME_CMD_COMPARE = 0x05, + RT_NVME_CMD_WRITE_ZEROES = 0x08, + RT_NVME_CMD_DSM = 0x09, + RT_NVME_CMD_VERIFY = 0x0c, + RT_NVME_CMD_RESV_REGISTER = 0x0d, + RT_NVME_CMD_RESV_REPORT = 0x0e, + RT_NVME_CMD_RESV_ACQUIRE = 0x11, + RT_NVME_CMD_RESV_RELEASE = 0x15, + RT_NVME_CMD_ZONE_MGMT_SEND = 0x79, + RT_NVME_CMD_ZONE_MGMT_RECV = 0x7a, + RT_NVME_CMD_ZONE_APPEND = 0x7d, + RT_NVME_CMD_VENDOR_START = 0x80, +}; + +enum +{ + RT_NVME_PSDT_PRP = 0x0, + RT_NVME_PSDT_SGL_MPTR_CONTIGUOUS = 0x1, + RT_NVME_PSDT_SGL_MPTR_SGL = 0x2, +}; + +/* Commands flags */ +enum +{ + RT_NVME_CMD_FLAGS_FUSE_SHIFT = 0x00, + RT_NVME_CMD_FLAGS_PSDT_SHIFT = 0x06, +}; + +struct rt_nvme_command_common +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + rt_le32_t nsid; + rt_le32_t cmd_dw2[2]; + rt_le64_t metadata; + rt_le64_t prp1; + rt_le64_t prp2; + rt_le32_t cmd_dw10[6]; +}; + +rt_packed(struct rt_nvme_sgl_desc +{ + rt_le64_t adddress; + rt_le32_t length; + rt_uint8_t reserved[3]; +#define SGL_DESC_TYPE_DATA_BLOCK 0x0 +#define SGL_DESC_TYPE_BIT_BUCKET 0x1 +#define SGL_DESC_TYPE_SEGMENT 0x2 +#define SGL_DESC_TYPE_LAST_SEGMENT 0x3 +#define SGL_DESC_TYPE_KEYED_DATA_BLOCK 0x4 +#define SGL_DESC_TYPE_VENDOR_SPECIFIC 0xf + rt_uint8_t sgl_identify; +}); + +struct rt_nvme_command_rw +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + rt_le32_t nsid; + NVME_RSVD(8, 8); + rt_le64_t metadata; + union + { + struct + { + rt_le64_t prp1; + rt_le64_t prp2; + }; + struct rt_nvme_sgl_desc sgl; + }; + rt_le64_t slba; + rt_le16_t length; + rt_le16_t control; + rt_le32_t dsmgmt; + rt_le32_t reftag; + rt_le16_t apptag; + rt_le16_t appmask; +}; + +enum +{ + RT_NVME_RW_LR = 1 << 15, + RT_NVME_RW_FUA = 1 << 14, + RT_NVME_RW_APPEND_PIREMAP = 1 << 9, + RT_NVME_RW_DSM_FREQ_UNSPEC = 0, + RT_NVME_RW_DSM_FREQ_TYPICAL = 1, + RT_NVME_RW_DSM_FREQ_RARE = 2, + RT_NVME_RW_DSM_FREQ_READS = 3, + RT_NVME_RW_DSM_FREQ_WRITES = 4, + RT_NVME_RW_DSM_FREQ_RW = 5, + RT_NVME_RW_DSM_FREQ_ONCE = 6, + RT_NVME_RW_DSM_FREQ_PREFETCH = 7, + RT_NVME_RW_DSM_FREQ_TEMP = 8, + RT_NVME_RW_DSM_LATENCY_NONE = 0 << 4, + RT_NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + RT_NVME_RW_DSM_LATENCY_NORM = 2 << 4, + RT_NVME_RW_DSM_LATENCY_LOW = 3 << 4, + RT_NVME_RW_DSM_SEQ_REQ = 1 << 6, + RT_NVME_RW_DSM_COMPRESSED = 1 << 7, + RT_NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + RT_NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + RT_NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + RT_NVME_RW_PRINFO_PRACT = 1 << 13, + RT_NVME_RW_DTYPE_STREAMS = 1 << 4, + RT_NVME_WZ_DEAC = 1 << 9, +}; + +enum +{ + RT_NVME_QUEUE_PHYS_CONTIG = (1 << 0), + RT_NVME_CQ_IRQ_ENABLED = (1 << 1), + RT_NVME_SQ_PRIO_URGENT = (0 << 1), + RT_NVME_SQ_PRIO_HIGH = (1 << 1), + RT_NVME_SQ_PRIO_MEDIUM = (2 << 1), + RT_NVME_SQ_PRIO_LOW = (3 << 1), + RT_NVME_FEAT_ARBITRATION = 0x01, + RT_NVME_FEAT_POWER_MGMT = 0x02, + RT_NVME_FEAT_LBA_RANGE = 0x03, + RT_NVME_FEAT_TEMP_THRESH = 0x04, + RT_NVME_FEAT_ERR_RECOVERY = 0x05, + RT_NVME_FEAT_VOLATILE_WC = 0x06, + RT_NVME_FEAT_NUM_QUEUES = 0x07, + RT_NVME_FEAT_IRQ_COALESCE = 0x08, + RT_NVME_FEAT_IRQ_CONFIG = 0x09, + RT_NVME_FEAT_WRITE_ATOMIC = 0x0a, + RT_NVME_FEAT_ASYNC_EVENT = 0x0b, + RT_NVME_FEAT_AUTO_PST = 0x0c, + RT_NVME_FEAT_SW_PROGRESS = 0x80, + RT_NVME_FEAT_HOST_ID = 0x81, + RT_NVME_FEAT_RESV_MASK = 0x82, + RT_NVME_FEAT_RESV_PERSIST = 0x83, + RT_NVME_LOG_ERROR = 0x01, + RT_NVME_LOG_SMART = 0x02, + RT_NVME_LOG_FW_SLOT = 0x03, + RT_NVME_LOG_RESERVATION = 0x80, + RT_NVME_FWACT_REPL = (0 << 3), + RT_NVME_FWACT_REPL_ACTV = (1 << 3), + RT_NVME_FWACT_ACTV = (2 << 3), +}; + +struct rt_nvme_command_identify +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + rt_le32_t nsid; + NVME_RSVD(8, 16); + rt_le64_t prp1; + rt_le64_t prp2; + rt_le32_t cns; + NVME_RSVD(64, 20); +}; + +struct rt_nvme_command_features +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + rt_le32_t nsid; + NVME_RSVD(8, 16); + rt_le64_t prp1; + rt_le64_t prp2; + rt_le32_t fid; + rt_le32_t dword11; + NVME_RSVD(68, 16); +}; + +struct rt_nvme_command_create_cq +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + NVME_RSVD(4, 20); + rt_le64_t prp1; + NVME_RSVD(32, 8); + rt_le16_t cqid; + rt_le16_t qsize; + rt_le16_t cq_flags; + rt_le16_t irq_vector; + NVME_RSVD(104, 16); +}; + +struct rt_nvme_command_create_sq +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + NVME_RSVD(4, 20); + rt_le64_t prp1; + NVME_RSVD(32, 8); + rt_le16_t sqid; + rt_le16_t qsize; + rt_le16_t sq_flags; + rt_le16_t cqid; + NVME_RSVD(104, 16); +}; + +struct rt_nvme_command_delete_queue +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + NVME_RSVD(4, 36); + rt_le16_t qid; + NVME_RSVD(42, 22); +}; + +struct rt_nvme_command_write_zeroes +{ + rt_uint8_t opcode; + rt_uint8_t flags; + rt_uint16_t cmdid; + rt_le32_t nsid; + NVME_RSVD(8, 8); + rt_le64_t metadata; + rt_le64_t prp1; + rt_le64_t prp2; + rt_le64_t slba; + rt_le16_t length; + rt_le16_t control; + rt_le32_t dsmgmt; + rt_le32_t reftag; + rt_le16_t apptag; + rt_le16_t appmask; +}; + +struct rt_nvme_command +{ + union + { + struct rt_nvme_command_common common; + struct rt_nvme_command_rw rw; + struct rt_nvme_command_identify identify; + struct rt_nvme_command_features features; + struct rt_nvme_command_create_cq create_cq; + struct rt_nvme_command_create_sq create_sq; + struct rt_nvme_command_delete_queue delete_queue; + struct rt_nvme_command_write_zeroes write_zeroes; + }; +}; + +struct rt_nvme_completion +{ + union + { + rt_le16_t u16; + rt_le32_t u32; + rt_le64_t u64; + } result; + rt_le16_t sq_head; /* How much of this queue may be reclaimed */ + rt_le16_t sq_id; /* Submission queue that generated this entry */ + rt_uint16_t cmdid; /* Which command completed */ + rt_le16_t status; /* Command status */ +}; + +enum +{ + RT_NVME_REG_CAP = 0x0000, /* Controller Capabilities */ + RT_NVME_REG_VS = 0x0008, /* Version */ + RT_NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */ + RT_NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */ + RT_NVME_REG_CC = 0x0014, /* Controller Configuration */ + RT_NVME_REG_CSTS = 0x001c, /* Controller Status */ + RT_NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */ + RT_NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ + RT_NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ + RT_NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ + RT_NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ + RT_NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + RT_NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */ + RT_NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */ + RT_NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer Location */ + RT_NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory Space Control */ + RT_NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */ + RT_NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ + RT_NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ + RT_NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ + RT_NVME_REG_PMREBS = 0x0e0c, /* Persistent Memory Region Elasticity Buffer Size */ + RT_NVME_REG_PMRSWTP = 0x0e10, /* Persistent Memory Region Sustained Write Throughput */ + RT_NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ +}; + +#define RT_NVME_CAP_MQES(cap) ((cap) & 0xffff) +#define RT_NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) +#define RT_NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define RT_NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) +#define RT_NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) + +#define RT_NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) + +#define RT_NVME_AQ_DEPTH 32 +#define RT_NVME_NR_AEN_COMMANDS 1 +#define RT_NVME_AQ_BLK_MQ_DEPTH (RT_NVME_AQ_DEPTH - RT_NVME_NR_AEN_COMMANDS) +#define RT_NVME_AQ_MQ_TAG_DEPTH (RT_NVME_AQ_BLK_MQ_DEPTH - 1) + +enum +{ + RT_NVME_CC_ENABLE = 1 << 0, + RT_NVME_CC_CSS_NVM = 0 << 4, + RT_NVME_CC_MPS_SHIFT = 7, + RT_NVME_CC_ARB_RR = 0 << 11, + RT_NVME_CC_ARB_WRRU = 1 << 11, + RT_NVME_CC_ARB_VS = 7 << 11, + RT_NVME_CC_SHN_NONE = 0 << 14, + RT_NVME_CC_SHN_NORMAL = 1 << 14, + RT_NVME_CC_SHN_ABRUPT = 2 << 14, + RT_NVME_CC_SHN_MASK = 3 << 14, + RT_NVME_CC_IOSQES = 6 << 16, + RT_NVME_CC_IOCQES = 4 << 20, + RT_NVME_CSTS_RDY = 1 << 0, + RT_NVME_CSTS_CFS = 1 << 1, + RT_NVME_CSTS_SHST_NORMAL = 0 << 2, + RT_NVME_CSTS_SHST_OCCUR = 1 << 2, + RT_NVME_CSTS_SHST_CMPLT = 2 << 2, + RT_NVME_CSTS_SHST_MASK = 3 << 2, +}; + +rt_packed(struct rt_nvme_id_power_state +{ + rt_le16_t mp; /* Maximum Power */ + NVME_RSVD(1, 1); + rt_uint8_t mxps_nops; /* Max Power Scale, Non-Operational State */ + rt_le32_t enlat; /* Entry Latency: microseconds */ + rt_le32_t exlat; /* Exit Latency: microseconds */ + rt_uint8_t rrt; /* Relative Read Throughput */ + rt_uint8_t rrl; /* Relative Read Latency */ + rt_uint8_t rwt; /* Relative Write Throughput */ + rt_uint8_t rwl; /* Relative Write Latency */ + rt_le16_t idlp; /* Idle Power */ + rt_uint8_t ips; /* Idle Power Scale */ + NVME_RSVD(19, 1); + rt_le16_t actp; /* Active Power */ + rt_uint8_t apw_aps; /* Active Power Workload, Active Power Scale */ + NVME_RSVD(23, 9); +}); + +rt_packed(struct rt_nvme_id_ctrl +{ + /* Controller Capabilities and Features */ + rt_le16_t vid; /* PCI Vendor ID */ + rt_le16_t ssvid; /* PCI Subsystem Vendor */ + char sn[20]; /* Serial Number */ + char mn[40]; /* Model Number */ + char fr[8]; /* Firmware Revision */ + rt_uint8_t rab; /* Recommended Arbitration Burst */ + rt_uint8_t ieee[3]; /* IEEE OUI Identifier */ + rt_uint8_t mic; /* Controller Multi-Path I/O and Namespace Sharing Capabilities */ + rt_uint8_t mdts; /* Maximum Data Transfer Size */ + rt_uint16_t cntlid; /* Controller ID */ + rt_uint32_t ver; /* Version */ + rt_uint32_t rtd3r; /* RTD3 Resume Latency */ + rt_uint32_t rtd3e; /* RTD3 Entry Latency */ + rt_uint32_t oaes; /* Optional Asynchronous Events Supported */ +#define RT_NVME_ID_CTRATT_ELBAS 15 /* Extended LBA Formats Supported */ +#define RT_NVME_ID_CTRATT_DNVMS 14 /* Delete NVM Set */ +#define RT_NVME_ID_CTRATT_DEG 13 /* Delete Endurance Group */ +#define RT_NVME_ID_CTRATT_VCM 12 /* Variable Capacity Management */ +#define RT_NVME_ID_CTRATT_FCM 11 /* Fixed Capacity Management */ +#define RT_NVME_ID_CTRATT_MDS 10 /* Multi-Domain Subsystem */ +#define RT_NVME_ID_CTRATT_UUIDL 9 /* UUID List */ +#define RT_NVME_ID_CTRATT_SQA 8 /* SQ Associations */ +#define RT_NVME_ID_CTRATT_NG 7 /* Namespace Granularity */ +#define RT_NVME_ID_CTRATT_TBKAS 6 /* Traffic Based Keep Alive Support */ +#define RT_NVME_ID_CTRATT_PLM 5 /* Predictable Latency Mode */ +#define RT_NVME_ID_CTRATT_EG 4 /* Endurance Groups */ +#define RT_NVME_ID_CTRATT_RRL 3 /* Read Recovery Levels */ +#define RT_NVME_ID_CTRATT_NVMS 2 /* NVM Sets */ +#define RT_NVME_ID_CTRATT_NOPSPM 1 /* Non-Operational Power State Permissive Mode */ +#define RT_NVME_ID_CTRATT_HIS 0 /* Host Identifier Support */ + rt_uint32_t ctratt; /* Controller Attributes */ + rt_uint16_t rrls; /* Read Recovery Levels Supported */ + NVME_RSVD(102, 9); + rt_uint8_t cntrltype; /* Controller Type */ + rt_uint8_t fguid[16]; /* FRU Globally Unique Identifier */ + rt_uint16_t crdt1; /* Command Retry Delay Time 1 */ + rt_uint16_t crdt2; /* Command Retry Delay Time 2 */ + rt_uint16_t crdt3; /* Command Retry Delay Time 3 */ + NVME_RSVD(134, 119); +#define RT_NVME_ID_NVMSR_NVMEE 1 /* NVMe Enclosure */ +#define RT_NVME_ID_NVMSR_NVMESD 0 /* NVMe Storage Device */ + rt_uint8_t nvmsr; /* NVM Subsystem Report */ + +#define RT_NVME_ID_VWCI_VWCRV 7 /* VPD Write Cycles Remaining Valid */ +#define RT_NVME_ID_VWCI_VWCR 0 /* VPD Write Cycles Remaining */ + rt_uint8_t vwci; /* VPD Write Cycle Information */ +#define RT_NVME_ID_MEC_PCIEME 1 /* PCIe Port Management Endpoint */ +#define RT_NVME_ID_MEC_SMBUSME 0 /* SMBus/I2C Port Management Endpoint */ + rt_uint8_t mec; /* Management Endpoint Capabilities */ + + /* Admin Command Set Attributes & Optional Controller Capabilities */ + rt_le16_t oacs; /* Optional Admin Command Support */ + rt_uint8_t acl; /* Abort Command Limit */ + rt_uint8_t aerl; /* Asynchronous Event Request Limit */ +#define RT_NVME_ID_FRMW_SMUD 5 /* Support Multiple Update Detection */ +#define RT_NVME_ID_FRMW_FAWR 4 /* Firmware Activation Without Reset */ +#define RT_NVME_ID_FRMW_NOFS 1 /* Number Of Firmware Slots */ +#define RT_NVME_ID_FRMW_FFSRO 0 /* First Firmware Slot Read Only */ + rt_uint8_t frmw; /* Firmware Updates */ + rt_uint8_t lpa; /* Log Page Attributes */ + rt_uint8_t elpe; /* Error Log Page Entries */ + rt_uint8_t npss; /* Number of Power States Support */ + rt_uint8_t avscc; /* Admin Vendor Specific Command Configuration */ + rt_uint8_t apsta; /* Autonomous Power State Transition Attributes */ + rt_le16_t wctemp; /* Warning Composite Temperature Threshold */ + rt_le16_t cctemp; /* Critical Composite Temperature Threshold */ + rt_uint16_t mtfa; /* Maximum Time for Firmware Activation */ + rt_uint32_t hmpre; /* Host Memory Buffer Preferred Size */ + rt_uint32_t hmmin; /* Host Memory Buffer Minimum Size */ + rt_uint8_t tnvmcap[16]; /* Total NVM Capacity */ + rt_uint8_t unvmcap[16]; /* Unallocated NVM Capacity */ +#define RT_NVME_ID_RPMBS_ASZ 24 /* Access Size */ +#define RT_NVME_ID_RPMBS_TSZ 16 /* Total Size */ +#define RT_NVME_ID_RPMBS_AM 3 /* Authentication Method */ +#define RT_NVME_ID_RPMBS_NORPMBU 2 /* Number of RPMB Units */ + rt_uint32_t rpmbs; /* Replay Protected Memory Block Support */ + rt_uint16_t edstt; /* Extended Device Self-test Time */ + rt_uint8_t dsto; /* Device Self-test Options */ + rt_uint8_t fwug; /* Firmware Update Granularity */ + rt_uint16_t kas; /* Keep Alive Support */ + rt_uint16_t hctma; /* Host Controlled Thermal Management Attributes */ + rt_uint16_t mntmt; /* Minimum Thermal Management Temperature */ + rt_uint16_t mxtmt; /* Maximum Thermal Management Temperature */ +#define RT_NVME_ID_SANICAP_NODMMAS 30 /* No-Deallocate Modifies Media After Sanitize */ +#define RT_NVME_ID_SANICAP_NDI 29 /* No-Deallocate Inhibited */ +#define RT_NVME_ID_SANICAP_OWS 2 /* Overwrite Support */ +#define RT_NVME_ID_SANICAP_BES 1 /* Block Erase Support */ +#define RT_NVME_ID_SANICAP_CES 0 /* Crypto Erase Support */ + rt_uint32_t sanicap; /* Sanitize Capabilities */ + rt_uint32_t hmminds; /* Host Memory Buffer Minimum Descriptor Entry Size */ + rt_uint16_t hmmaxd; /* Host Memory Maximum Descriptors Entries */ + rt_uint16_t nsetidmax; /* NVM Set Identifier Maximum */ + rt_uint16_t endgidmax; /* Endurance Group Identifier Maximum */ + rt_uint8_t anatt; /* ANA Transition Time */ + rt_uint8_t anacap; /* Asymmetric Namespace Access Capabilities */ + rt_uint32_t anagrpmax; /* ANA Group Identifier Maximum */ + rt_uint32_t nanagrpid; /* Number of ANA Group Identifiers */ + rt_uint32_t pels; /* Persistent Event Log Size */ + rt_uint16_t dmid; /* Domain Identifier */ + NVME_RSVD(358, 10); + rt_uint8_t megcap[16]; /* Max Endurance Group Capacity */ + NVME_RSVD(384, 128); + + /* NVM Command Set Attributes */ + rt_uint8_t sqes; /* Submission Queue Entry Size */ + rt_uint8_t cqes; /* Completion Queue Entry Size */ + rt_le16_t maxcmd; /* Maximum Outstanding Commands */ + rt_le32_t nn; /* Number of Namespaces */ + rt_le16_t oncs; /* Optional NVM Command Support */ + rt_le16_t fuses; /* Fused Operation Support */ + rt_uint8_t fna; /* Format NVM Attributes */ + rt_uint8_t vwc; /* Volatile Write Cache */ + rt_le16_t awun; /* Atomic Write Unit Normal */ + rt_le16_t awupf; /* Atomic Write Unit Power Fail */ + rt_uint8_t nvscc; /* I/O Command Set Vendor Specific Command Configuration */ + rt_uint8_t nwpc; /* Namespace Write Protection Capabilities */ + rt_le16_t acwu; /* Atomic Compare & Write Unit */ + rt_le16_t cdfs; /* Copy Descriptor Formats Supported */ +#define RT_NVME_ID_SGL_SUPPORT_MASK 0x3 + rt_le32_t sgls; /* SGL Support */ + rt_uint32_t mnan; /* Maximum Number of Allowed Namespaces */ + char maxdna[16]; /* Maximum Domain Namespace Attachments */ + rt_le32_t maxcna; /* Maximum I/O Controller Namespace Attachments */ + NVME_RSVD(564, 204); + rt_uint8_t subnqn[256]; /* NVM Subsystem NVMe Qualified Name */ + NVME_RSVD(1024, 768); + rt_le32_t ioccsz; /* I/O Queue Command Capsule Supported Size */ + rt_le32_t iorcsz; /* I/O Queue Response Capsule Supported Size */ + rt_le16_t icdoff; /* In Capsule Data Offset */ + rt_uint8_t ctrattr; /* Fabrics Controller Attributes */ + rt_uint8_t msdbd; /* Maximum SGL Data Block Descriptors */ + rt_le16_t ofcs; /* Optional Fabric Commands Support */ + rt_uint8_t dctype; + NVME_RSVD(1807, 241); + + /* Power State Descriptors */ + struct rt_nvme_id_power_state psd[32]; + + /* Vendor Specific */ + rt_uint8_t vs[1024]; +}); + +enum +{ + RT_NVME_CTRL_CMIC_MULTI_PORT = 1 << 0, + RT_NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, + RT_NVME_CTRL_CMIC_ANA = 1 << 3, + RT_NVME_CTRL_ONCS_COMPARE = 1 << 0, + RT_NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + RT_NVME_CTRL_ONCS_DSM = 1 << 2, + RT_NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, + RT_NVME_CTRL_ONCS_RESERVATIONS = 1 << 5, + RT_NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, + RT_NVME_CTRL_VWC_PRESENT = 1 << 0, + RT_NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + RT_NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, + RT_NVME_CTRL_OACS_DIRECTIVES = 1 << 5, + RT_NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + RT_NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, + RT_NVME_CTRL_CTRATT_128_ID = 1 << 0, + RT_NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1, + RT_NVME_CTRL_CTRATT_NVM_SETS = 1 << 2, + RT_NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3, + RT_NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4, + RT_NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, + RT_NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, + RT_NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, +}; + +struct rt_nvme_lba_format +{ + rt_le16_t ms; /* Metadata size */ + rt_uint8_t ds; /* Data size */ + rt_uint8_t rp; /* Relative performance */ +}; + +rt_packed(struct rt_nvme_id_ns +{ + rt_le64_t nsze; /* Namespace size */ + rt_le64_t ncap; /* Namespace capacity */ + rt_le64_t nuse; /* Namespace utilization */ + rt_uint8_t nsfeat; /* Namespace features */ + rt_uint8_t nlbaf; /* Number of lba formats */ + rt_uint8_t flbas; /* Formatted lba size */ + rt_uint8_t mc; /* Metadata capabilities */ + rt_uint8_t dpc; /* End-to-end data protection capabilities */ + rt_uint8_t dps; /* End-to-end data protection type settings */ + rt_uint8_t nmic; /* Namespace Multi-path I/O and Namespace Sharing Capabilities */ + rt_uint8_t rescap; /* Reservation Capabilities */ + rt_uint8_t fpi; /* Format Progress Indicator */ + rt_uint8_t dlfeat; /* Deallocate Logical Block Features */ + rt_le16_t nawun; /* Namespace Atomic Write Unit Normal */ + rt_le16_t nawupf; /* Namespace Atomic Write Unit Power Fail */ + rt_le16_t nacwu; /* Namespace Atomic Compare & Write Unit */ + rt_le16_t nabsn; /* Namespace Atomic Boundary Size Normal */ + rt_le16_t nabo; /* Namespace Atomic Boundary Offset */ + rt_le16_t nabspf; /* Namespace Atomic Boundary Size Power Fail */ + rt_uint16_t noiob; /* Namespace Optimal IO Boundary */ + rt_le64_t nvmcap[2]; /* NVMe Capacity */ + rt_uint16_t npwg; /* Namespace Preferred Write Granularity */ + rt_uint16_t npwa; /* Namespace Preferred Write Alignment */ + rt_uint16_t npdg; /* Namespace Preferred Deallocate Granularity */ + rt_uint16_t npda; /* Namespace Preferred Deallocate Alignment */ + rt_uint16_t nows; /* Namespace Optimal Write Size */ + NVME_RSVD(118, 18); + rt_uint32_t anagrpid; /* ANA Group Identifier */ + NVME_RSVD(139, 3); + rt_uint8_t nsattr; /* Namespace Attributes */ + rt_uint16_t nvmsetid; /* NVMe Set Identifier */ + rt_uint16_t endgid; /* Endurance Group Identifier */ + rt_uint8_t nguid[16]; /* Namespace Globally Unique Identifier */ + rt_uint8_t eui64[8]; /* IEEE Extended Unique Identifier */ + + /* Logical Block Address Format */ + struct rt_nvme_lba_format lbaf[16]; + NVME_RSVD(171, 192); + + /* Vendor specific */ + rt_uint8_t vs[3712]; +}); + +enum +{ + RT_NVME_NS_FEAT_THIN = 1 << 0, + RT_NVME_NS_FLBAS_LBA_MASK = 0xf, + RT_NVME_NS_FLBAS_LBA_UMASK = 0x60, + RT_NVME_NS_FLBAS_LBA_SHIFT = 1, + RT_NVME_NS_FLBAS_META_EXT = 0x10, + RT_NVME_LBAF_RP_BEST = 0, + RT_NVME_LBAF_RP_BETTER = 1, + RT_NVME_LBAF_RP_GOOD = 2, + RT_NVME_LBAF_RP_DEGRADED = 3, + RT_NVME_NS_DPC_PI_LAST = 1 << 4, + RT_NVME_NS_DPC_PI_FIRST = 1 << 3, + RT_NVME_NS_DPC_PI_TYPE3 = 1 << 2, + RT_NVME_NS_DPC_PI_TYPE2 = 1 << 1, + RT_NVME_NS_DPC_PI_TYPE1 = 1 << 0, + RT_NVME_NS_DPS_PI_FIRST = 1 << 3, + RT_NVME_NS_DPS_PI_MASK = 0x7, + RT_NVME_NS_DPS_PI_TYPE1 = 1, + RT_NVME_NS_DPS_PI_TYPE2 = 2, + RT_NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct rt_nvme_ops; +struct rt_nvme_controller; + +/* + * An NVM Express queue. Each device has at least two (one for admin commands + * and one for I/O commands). + */ +struct rt_nvme_queue +{ + struct rt_nvme_controller *nvme; + struct rt_nvme_command *sq_cmds; + struct rt_nvme_completion *cq_entry; + + rt_ubase_t sq_cmds_phy; + rt_ubase_t cq_entry_phy; + + rt_uint32_t *doorbell; + rt_uint16_t qid; + rt_uint16_t depth; + rt_uint16_t sq_head; + rt_uint16_t sq_tail; + rt_uint16_t cq_head; + rt_uint16_t cq_phase; + + rt_err_t err; + struct rt_nvme_command *cmd; + + struct rt_completion done; + struct rt_spinlock lock; +}; + +struct rt_nvme_controller +{ + rt_list_t list; + struct rt_device *dev; + + int nvme_id; + char name[RT_NAME_MAX]; + + void *regs; + rt_uint64_t cap; + rt_uint32_t page_shift; + rt_uint32_t page_size; + rt_uint32_t queue_depth; + rt_uint32_t io_queue_max; + rt_uint32_t ctrl_config; + rt_uint32_t max_transfer_shift:8; + rt_uint32_t volatile_write_cache:8; + rt_uint32_t write_zeroes:1; + rt_uint32_t sgl_mode:2; + rt_uint32_t doorbell_stride; + rt_uint32_t *doorbell_tbl; + + const struct rt_nvme_ops *ops; + +#define RT_USING_NVME_QUEUE (1 + (RT_USING_NVME_IO_QUEUE * RT_CPUS_NR)) + int irqs_nr; + int irqs[RT_USING_NVME_QUEUE]; + union + { + struct + { + struct rt_nvme_queue admin_queue; + struct rt_nvme_queue io_queues[RT_USING_NVME_IO_QUEUE * RT_CPUS_NR]; + }; + struct rt_nvme_queue queue[RT_USING_NVME_QUEUE]; + }; + + volatile rt_atomic_t cmdid; + volatile rt_atomic_t ioqid[RT_CPUS_NR]; + + rt_list_t ns_nodes; +}; + +struct rt_nvme_device +{ + struct rt_blk_disk parent; + struct rt_nvme_controller *ctrl; + + rt_list_t list; + + rt_uint32_t nsid; + rt_uint32_t lba_shift; + struct rt_nvme_id_ns id; +}; +#define rt_disk_to_nvme_device(disk) rt_container_of(disk, struct rt_nvme_device, parent) + +struct rt_nvme_ops +{ + const char *name; + + /* Controller-specific NVM Express queue setup */ + rt_err_t (*setup_queue)(struct rt_nvme_queue *queue); + /* Controller-specific NVM Express queue cleanup */ + rt_err_t (*cleanup_queue)(struct rt_nvme_queue *queue); + /* Controller-specific NVM Express command submission */ + rt_err_t (*submit_cmd)(struct rt_nvme_queue *queue, struct rt_nvme_command *cmd); + /* Controller-specific NVM Express command completion */ + void (*complete_cmd)(struct rt_nvme_queue *queue, struct rt_nvme_command *cmd); +}; + +rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme); +rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme); + +#endif /* __NVME_H__ */ diff --git a/components/drivers/include/rtdevice.h b/components/drivers/include/rtdevice.h index 19e9de0467..18a64e259c 100644 --- a/components/drivers/include/rtdevice.h +++ b/components/drivers/include/rtdevice.h @@ -55,6 +55,10 @@ extern "C" { #include "drivers/iio.h" +#ifdef RT_USING_NVME +#include "drivers/nvme.h" +#endif + #ifdef RT_USING_OFW #include "drivers/ofw.h" #include "drivers/ofw_fdt.h" diff --git a/components/drivers/nvme/Kconfig b/components/drivers/nvme/Kconfig new file mode 100644 index 0000000000..83c731c0af --- /dev/null +++ b/components/drivers/nvme/Kconfig @@ -0,0 +1,23 @@ +menuconfig RT_USING_NVME + bool "Using Non-Volatile Memory Express (NVME) device drivers" + depends on RT_USING_DM + depends on RT_USING_BLK + depends on RT_USING_DMA + default n + +config RT_USING_NVME_IO_QUEUE + int "Number of I/O Command queue" + depends on RT_USING_NVME + default 2 if RT_THREAD_PRIORITY_8 + default 4 if RT_THREAD_PRIORITY_32 + default 8 if RT_THREAD_PRIORITY_256 + +config RT_NVME_PCI + bool "NVME support on PCI bus" + depends on RT_USING_NVME + depends on RT_USING_PCI + default y + +if RT_USING_NVME + osource "$(SOC_DM_NVME_DIR)/Kconfig" +endif diff --git a/components/drivers/nvme/SConscript b/components/drivers/nvme/SConscript new file mode 100644 index 0000000000..6fc699be24 --- /dev/null +++ b/components/drivers/nvme/SConscript @@ -0,0 +1,18 @@ +from building import * + +group = [] + +if not GetDepend(['RT_USING_NVME']): + Return('group') + +cwd = GetCurrentDir() +CPPPATH = [cwd + '/../include'] + +src = ['nvme.c'] + +if GetDepend(['RT_NVME_PCI']): + src += ['nvme-pci.c'] + +group = DefineGroup('DeviceDrivers', src, depend = [''], CPPPATH = CPPPATH) + +Return('group') diff --git a/components/drivers/nvme/nvme-pci.c b/components/drivers/nvme/nvme-pci.c new file mode 100644 index 0000000000..d73260ffd1 --- /dev/null +++ b/components/drivers/nvme/nvme-pci.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2006-2023, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2023-02-25 GuEe-GUI the first version + */ + +#include +#include + +#define NVME_REG_BAR 0 + +struct pci_nvme_quirk +{ + const struct rt_nvme_ops *ops; +}; + +struct pci_nvme_controller +{ + struct rt_nvme_controller parent; + const struct pci_nvme_quirk *quirk; + + rt_bool_t is_msi; + struct rt_pci_msix_entry msix_entries[RT_USING_NVME_QUEUE]; +}; + +static const struct rt_nvme_ops pci_nvme_std_ops = +{ + .name = "PCI", +}; + +static rt_err_t pci_nvme_probe(struct rt_pci_device *pdev) +{ + rt_err_t err; + rt_ssize_t msi_nr; + struct rt_nvme_controller *nvme; + struct pci_nvme_controller *pci_nvme = rt_calloc(1, sizeof(*pci_nvme)); + const struct pci_nvme_quirk *quirk = pdev->id->data; + + if (!pci_nvme) + { + return -RT_ENOMEM; + } + + pci_nvme->quirk = quirk; + nvme = &pci_nvme->parent; + nvme->dev = &pdev->parent; + nvme->regs = rt_pci_iomap(pdev, NVME_REG_BAR); + + if (!nvme->regs) + { + err = -RT_EIO; + goto _fail; + } + + nvme->ops = quirk && quirk->ops ? quirk->ops : &pci_nvme_std_ops; + + if ((msi_nr = rt_pci_msix_vector_count(pdev)) <= 0) + { + msi_nr = rt_pci_msi_vector_count(pdev); + } + if (msi_nr > 0) + { + nvme->irqs_nr = RT_ARRAY_SIZE(pci_nvme->msix_entries); + nvme->irqs_nr = rt_min_t(rt_size_t, msi_nr, nvme->irqs_nr); + } + + if (nvme->irqs_nr > 0) + { + rt_pci_msix_entry_index_linear(pci_nvme->msix_entries, nvme->irqs_nr); + + if (rt_pci_msix_enable(pdev, pci_nvme->msix_entries, nvme->irqs_nr) > 0) + { + pci_nvme->is_msi = RT_TRUE; + + for (int i = 0; i < nvme->irqs_nr; ++i) + { + nvme->irqs[i] = pci_nvme->msix_entries[i].irq; + } + } + } + + if (!pci_nvme->is_msi) + { + nvme->irqs_nr = 1; + nvme->irqs[0] = pdev->irq; + rt_pci_irq_unmask(pdev); + } + + rt_pci_set_master(pdev); + + if ((err = rt_nvme_controller_register(nvme))) + { + goto _disable; + } + + pdev->parent.user_data = pci_nvme; + + return RT_EOK; + +_disable: + if (pci_nvme->is_msi) + { + rt_pci_msix_disable(pdev); + } + else + { + rt_pci_irq_mask(pdev); + } + rt_pci_clear_master(pdev); + rt_iounmap(nvme->regs); + +_fail: + rt_free(pci_nvme); + + return err; +} + +static rt_err_t pci_nvme_remove(struct rt_pci_device *pdev) +{ + struct rt_nvme_controller *nvme; + struct pci_nvme_controller *pci_nvme = pdev->parent.user_data; + + nvme = &pci_nvme->parent; + + rt_nvme_controller_unregister(nvme); + + if (pci_nvme->is_msi) + { + rt_pci_msix_disable(pdev); + } + else + { + /* INTx is shared, don't mask all */ + rt_hw_interrupt_umask(pdev->irq); + rt_pci_irq_mask(pdev); + } + + rt_pci_clear_master(pdev); + + rt_iounmap(nvme->regs); + rt_free(pci_nvme); + + return RT_EOK; +} + +static rt_err_t pci_nvme_shutdown(struct rt_pci_device *pdev) +{ + return pci_nvme_remove(pdev); +} + +static const struct rt_pci_device_id pci_nvme_ids[] = +{ + { RT_PCI_DEVICE_ID(PCI_VENDOR_ID_REDHAT, 0x0010) }, + { RT_PCI_DEVICE_CLASS(PCIS_STORAGE_EXPRESS, ~0) }, + { /* sentinel */ } +}; + +static struct rt_pci_driver pci_nvme_driver = +{ + .name = "nvme-pci", + + .ids = pci_nvme_ids, + .probe = pci_nvme_probe, + .remove = pci_nvme_remove, + .shutdown = pci_nvme_shutdown, +}; +RT_PCI_DRIVER_EXPORT(pci_nvme_driver); diff --git a/components/drivers/nvme/nvme.c b/components/drivers/nvme/nvme.c new file mode 100644 index 0000000000..68a47a89c1 --- /dev/null +++ b/components/drivers/nvme/nvme.c @@ -0,0 +1,1302 @@ +/* + * Copyright (c) 2006-2023, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2023-02-25 GuEe-GUI the first version + */ + +#include +#include +#include + +#define DBG_TAG "rtdm.nvme" +#define DBG_LVL DBG_INFO +#include + +static struct rt_dm_ida nvme_controller_ida = RT_DM_IDA_INIT(CUSTOM); +static struct rt_dm_ida nvme_ida = RT_DM_IDA_INIT(NVME); + +static struct rt_spinlock nvme_lock = {}; +static rt_list_t nvme_nodes = RT_LIST_OBJECT_INIT(nvme_nodes); + +rt_inline rt_uint32_t nvme_readl(struct rt_nvme_controller *nvme, int offset) +{ + return HWREG32(nvme->regs + offset); +} + +rt_inline void nvme_writel(struct rt_nvme_controller *nvme, int offset, rt_uint32_t value) +{ + HWREG32(nvme->regs + offset) = value; +} + +rt_inline rt_uint64_t nvme_readq(struct rt_nvme_controller *nvme, int offset) +{ + rt_uint32_t lo32, hi32; + + lo32 = HWREG32(nvme->regs + offset); + hi32 = HWREG32(nvme->regs + offset + 4); + + return ((rt_uint64_t)hi32 << 32) + lo32; +} + +rt_inline void nvme_writeq(struct rt_nvme_controller *nvme, int offset, rt_uint64_t value) +{ + nvme_writel(nvme, offset, (rt_uint32_t)(value & 0xffffffff)); + nvme_writel(nvme, offset + 4, (rt_uint32_t)(value >> 32)); +} + +static rt_err_t nvme_poll_csts(struct rt_nvme_controller *nvme, + rt_uint32_t mask, rt_uint32_t value) +{ + rt_tick_t timeout; + + timeout = rt_tick_from_millisecond(RT_NVME_CAP_TIMEOUT(nvme->cap) * 500); + timeout += rt_tick_get(); + + do { + if ((nvme_readl(nvme, RT_NVME_REG_CSTS) & mask) == value) + { + return RT_EOK; + } + + rt_hw_cpu_relax(); + } while (rt_tick_get() < timeout); + + return -RT_ETIMEOUT; +} + +static rt_err_t nvme_enable_ctrl(struct rt_nvme_controller *nvme) +{ + nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK; + nvme->ctrl_config |= RT_NVME_CC_ENABLE; + nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config); + + return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, RT_NVME_CSTS_RDY); +} + +static rt_err_t nvme_disable_ctrl(struct rt_nvme_controller *nvme) +{ + nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK; + nvme->ctrl_config &= ~RT_NVME_CC_ENABLE; + nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config); + + return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, 0); +} + +static rt_err_t nvme_shutdown_ctrl(struct rt_nvme_controller *nvme) +{ + nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK; + nvme->ctrl_config |= RT_NVME_CC_SHN_NORMAL; + nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config); + + return nvme_poll_csts(nvme, RT_NVME_CSTS_SHST_MASK, RT_NVME_CSTS_SHST_CMPLT); +} + +rt_inline rt_le16_t nvme_next_cmdid(struct rt_nvme_controller *nvme) +{ + return rt_cpu_to_le16((rt_uint16_t)rt_atomic_add(&nvme->cmdid, 1)); +} + +static rt_err_t nvme_submit_cmd(struct rt_nvme_queue *queue, + struct rt_nvme_command *cmd) +{ + rt_ubase_t level; + rt_err_t err = RT_EOK; + rt_uint16_t tail, head; + struct rt_nvme_controller *nvme = queue->nvme; + +_retry: + level = rt_spin_lock_irqsave(&queue->lock); + + tail = queue->sq_tail; + head = queue->cq_head; + + if (tail + 1 == head) + { + /* IO queue is full, waiting for the last IO command to complete. */ + rt_spin_unlock_irqrestore(&queue->lock, level); + + rt_thread_yield(); + + goto _retry; + } + + cmd->common.cmdid = nvme_next_cmdid(nvme); + rt_memcpy(&queue->sq_cmds[tail], cmd, sizeof(*cmd)); + + if (nvme->ops->submit_cmd) + { + if ((err = nvme->ops->submit_cmd(queue, cmd))) + { + return err; + } + } + + if (++tail == queue->depth) + { + tail = 0; + } + HWREG32(queue->doorbell) = tail; + queue->sq_tail = tail; + + queue->cmd = cmd; + queue->err = RT_EOK; + + rt_spin_unlock_irqrestore(&queue->lock, level); + + err = rt_completion_wait(&queue->done, + rt_tick_from_millisecond(queue->qid != 0 ? RT_WAITING_FOREVER : 60)); + + return err ? : queue->err; +} + +static rt_err_t nvme_set_features_simple(struct rt_nvme_controller *nvme, + rt_uint32_t fid, rt_uint32_t dword11) +{ + struct rt_nvme_command cmd; + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.features.opcode = RT_NVME_ADMIN_OPCODE_SET_FEATURES; + cmd.features.fid = rt_cpu_to_le32(fid); + cmd.features.dword11 = rt_cpu_to_le32(dword11); + + return nvme_submit_cmd(&nvme->admin_queue, &cmd); +} + +static rt_err_t nvme_submit_io_cmd(struct rt_nvme_controller *nvme, + struct rt_nvme_command *cmd) +{ + rt_uint16_t qid; + + qid = rt_atomic_add(&nvme->ioqid[rt_hw_cpu_id()], RT_CPUS_NR); + qid %= nvme->io_queue_max; + + return nvme_submit_cmd(&nvme->io_queues[qid], cmd); +} + +/* + * PRP Mode: + * + * |63 n+1|n 0| + * +----------------------------------------+----------+---+---+ + * | Page Base Address | Offset | 0 | 0 | + * +----------------------------------------+----------+---+---+ + * | + * v + * Host Physical Pages + * +----------------------------+ + * +--------------+----------+ | Page k | + * | PRP Entry1 | Offset +---------->+----------------------------+ + * +--------------+----------+ | Page k + 1 | + * +----------------------------+ + * ... + * +----------------------------+ + * +--------------+----------+ | Page k + m | + * | PRP Entry2 | 0 +---------->+----------------------------+ + * +--------------+----------+ | Page k + m + 1 | + * +----------------------------+ + * PRP List (In PRP Entry2): + * + * |63 n+1|n 0| + * +----------------------------------------+------------------+ + * | Page Base Address k | 0h | + * +----------------------------------------+------------------+ + * | Page Base Address k + 1 | 0h | + * +----------------------------------------+------------------+ + * | ... | + * +----------------------------------------+------------------+ + * | Page Base Address k + m | 0h | + * +----------------------------------------+------------------+ + * | Page Base Address k + m + 1 | 0h | + * +----------------------------------------+------------------+ + * + * SGL Mode: + * +----- Non-transport + * LBA / + * +---------------+---------------+-------/-------+---------------+ + * | 3KB | 4KB | 2KB | 4KB | + * +-------+-------+-------+-------+---------------+--------+------+ + * | +-------------------------+ | + * | | | + * | +--------------------|------+ + * | | | + * +-------v-------+ +-------v-------+ +-------v-------+ + * | A MEM BLOCK | | B MEM BLOCK | | C MEM BLOCK | + * +-------^-------+ +-------^-------+ +-------^-------+ + * | | | + * +----------------+ | | + * | | | + * Segment(0) | | | + * +----------+----------+ | | | + * | Address: A +--+ | | + * +----------+----------+ | | + * | Type: 0h | Len: 3KB | | | + * +----------+----------+ | | + * | Address: Segment(1) +--+ | | + * +----------+----------+ | | | + * | Type: 2h | Len: 48 | | | | + * +----------+----------+ | | | + * | | | + * +------------------------+ | | + * | | | + * v | | + * Segment(1) | | + * +----------+----------+ | | + * | Address: B +------+ | + * +----------+----------+ | + * | Type: 0h | Len: 4KB | | + * +----------+----------+ | + * | Address: | | + * +----------+----------+ | + * | Type: 1h | Len: 2KB | | + * +----------+----------+ | + * | Address: Segment(2) +--+ | + * +----------+----------+ | | + * | Type: 0h | Len: 16 | | | + * +----------+----------+ | | + * | | + * +------------------------+ | + * | | + * v | + * Segment(2) | + * +----------+----------+ | + * | Address: C +---------------------------+ + * +----------+----------+ + * | Type: 0h | Len: 4KB | + * +----------+----------+ + */ + +static rt_ssize_t nvme_blk_rw(struct rt_nvme_device *ndev, rt_off_t slba, + rt_ubase_t buffer_dma, rt_size_t lbas, rt_uint8_t opcode) +{ + rt_err_t err; + rt_uint16_t max_lbas; + rt_uint32_t lba_shift; + rt_size_t tlbas; + rt_ssize_t data_length; + struct rt_nvme_command cmd; + struct rt_nvme_controller *nvme = ndev->ctrl; + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.rw.opcode = opcode; + cmd.rw.flags = nvme->sgl_mode << RT_NVME_CMD_FLAGS_PSDT_SHIFT; + cmd.rw.nsid = rt_cpu_to_le32(ndev->nsid); + + tlbas = lbas; + lba_shift = ndev->lba_shift; + max_lbas = 1 << (nvme->max_transfer_shift - lba_shift); + + if (nvme->sgl_mode) + { + while ((rt_ssize_t)lbas > 0) + { + if (lbas < max_lbas) + { + max_lbas = (rt_uint16_t)lbas; + } + + data_length = max_lbas << lba_shift; + + cmd.rw.sgl.adddress = rt_cpu_to_le64(buffer_dma); + cmd.rw.sgl.length = rt_cpu_to_le32(data_length); + cmd.rw.sgl.sgl_identify = SGL_DESC_TYPE_DATA_BLOCK; + cmd.rw.slba = rt_cpu_to_le16(slba); + cmd.rw.length = rt_cpu_to_le16(max_lbas - 1); + + if ((err = nvme_submit_io_cmd(nvme, &cmd))) + { + tlbas -= lbas; + break; + } + + lbas -= max_lbas; + slba += max_lbas; + buffer_dma += data_length; + } + } + else + { + void *prp_list = RT_NULL; + rt_size_t prp_list_size = 0, page_size; + + page_size = nvme->page_size; + + while ((rt_ssize_t)lbas > 0) + { + rt_uint64_t prp2_addr, dma_addr; + rt_ssize_t remain_length, page_offset; + + if (lbas < max_lbas) + { + max_lbas = (rt_uint16_t)lbas; + } + + /* + * PRP transfer: + * 1. data_length <= 4KB: + * prp1 = buffer_dma + * prp2 = 0 + * + * 2. 4KB < data_length <= 8KB: + * prp1 = buffer_dma + * prp2 = buffer_dma + * + * 3. 8KB < data_length: + * prp1 = buffer_dma(0, 4k) + * prp2 = buffer_dma(4k, ~) + */ + dma_addr = buffer_dma; + page_offset = buffer_dma & (page_size - 1); + data_length = max_lbas << lba_shift; + remain_length = data_length - (page_size - page_offset); + + do { + rt_size_t prps_per_page, prps, pages; + rt_uint64_t *prp_list_ptr, prp_list_dma; + + if (remain_length <= 0) + { + prp2_addr = 0; + break; + } + + if (remain_length) + { + dma_addr += (page_size - page_offset); + } + + if (remain_length <= page_size) + { + prp2_addr = dma_addr; + break; + } + + prps_per_page = page_size / sizeof(rt_uint64_t); + prps = RT_DIV_ROUND_UP(remain_length, page_size); + pages = RT_DIV_ROUND_UP(prps - 1, prps_per_page - 1); + + if (prps > prp_list_size) + { + if (prp_list) + { + rt_free_align(prp_list); + } + + prp_list = rt_malloc_align(pages * page_size, page_size); + + if (!prp_list) + { + LOG_D("No memory to create a PRP List"); + /* Ask user to try again */ + return tlbas - lbas; + } + + prp_list_size = pages * (prps_per_page - 1) + 1; + } + prp_list_ptr = prp_list; + prp_list_dma = (rt_uint64_t)rt_kmem_v2p(prp_list_ptr); + + prp2_addr = prp_list_dma; + + for (int i = 0; prps; --prps, ++i) + { + /* End of the entry, fill the next entry addr if remain */ + if ((i == (prps_per_page - 1)) && prps > 1) + { + prp_list_dma += page_size; + *prp_list_ptr = rt_cpu_to_le64(prp_list_dma); + + /* Start to fill the next PRP */ + i = 0; + } + + *prp_list_ptr = rt_cpu_to_le64(dma_addr); + dma_addr += page_size; + } + + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, prp_list_ptr, prp_list_size); + } while (0); + + cmd.rw.prp1 = rt_cpu_to_le64(buffer_dma); + cmd.rw.prp2 = rt_cpu_to_le64(prp2_addr); + cmd.rw.slba = rt_cpu_to_le16(slba); + cmd.rw.length = rt_cpu_to_le16(max_lbas - 1); + + if ((err = nvme_submit_io_cmd(nvme, &cmd))) + { + tlbas -= lbas; + break; + } + + lbas -= max_lbas; + slba += max_lbas; + buffer_dma += data_length; + } + + if (prp_list) + { + rt_free_align(prp_list); + } + } + + return tlbas; +} + +static rt_ssize_t nvme_blk_read(struct rt_blk_disk *disk, rt_off_t sector, + void *buffer, rt_size_t sector_count) +{ + rt_ssize_t res; + rt_uint32_t page_bits; + rt_size_t buffer_size; + rt_ubase_t buffer_dma; + void *temp_buffer = RT_NULL; + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + struct rt_nvme_controller *nvme = ndev->ctrl; + + buffer_size = (1 << ndev->lba_shift) * sector_count; + buffer_dma = (rt_ubase_t)rt_kmem_v2p(buffer); + + if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) || + (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK))) + { + LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "read"); + + page_bits = rt_page_bits(buffer_size); + temp_buffer = rt_pages_alloc(page_bits); + + if (!temp_buffer) + { + return -RT_ENOMEM; + } + + buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer); + } + + res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_READ); + + if (res > 0) + { + if (res != sector_count) + { + /* + * Don't always aim for optimization, checking for equality + * is much faster than multiplication calculation. + */ + buffer_size = res * (1 << ndev->lba_shift); + } + + if (temp_buffer) + { + rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, temp_buffer, buffer_size); + rt_memcpy(buffer, temp_buffer, buffer_size); + } + else + { + rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, buffer, buffer_size); + } + } + + if (temp_buffer) + { + rt_pages_free(temp_buffer, page_bits); + } + + return res; +} + +static rt_ssize_t nvme_blk_write(struct rt_blk_disk *disk, rt_off_t sector, + const void *buffer, rt_size_t sector_count) +{ + rt_ssize_t res; + rt_uint32_t page_bits; + rt_size_t buffer_size; + rt_ubase_t buffer_dma; + void *temp_buffer = RT_NULL; + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + struct rt_nvme_controller *nvme = ndev->ctrl; + + buffer_size = (1 << ndev->lba_shift) * sector_count; + buffer_dma = (rt_ubase_t)rt_kmem_v2p((void *)buffer); + + if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) || + (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK))) + { + LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "write"); + + page_bits = rt_page_bits(buffer_size); + temp_buffer = rt_pages_alloc(page_bits); + + if (!temp_buffer) + { + return -RT_ENOMEM; + } + + buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer); + + rt_memcpy(temp_buffer, buffer, buffer_size); + buffer = temp_buffer; + } + + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)buffer, buffer_size); + + res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_WRITE); + + if (temp_buffer) + { + rt_pages_free(temp_buffer, page_bits); + } + + return res; +} + +static rt_err_t nvme_blk_getgeome(struct rt_blk_disk *disk, + struct rt_device_blk_geometry *geometry) +{ + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + + geometry->bytes_per_sector = 1 << ndev->lba_shift; + geometry->block_size = 1 << ndev->lba_shift; + geometry->sector_count = rt_le64_to_cpu(ndev->id.nsze); + + return RT_EOK; +} + +static rt_err_t nvme_blk_sync(struct rt_blk_disk *disk) +{ + struct rt_nvme_command cmd; + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.common.opcode = RT_NVME_CMD_FLUSH; + cmd.common.nsid = rt_cpu_to_le32(ndev->nsid); + + return nvme_submit_io_cmd(ndev->ctrl, &cmd); +} + +static rt_err_t nvme_blk_erase(struct rt_blk_disk *disk) +{ + rt_err_t err; + rt_ssize_t slba, lbas, max_lbas; + struct rt_nvme_command cmd; + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + struct rt_nvme_controller *nvme = ndev->ctrl; + + if (!nvme->write_zeroes) + { + return -RT_ENOSYS; + } + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.write_zeroes.opcode = RT_NVME_CMD_WRITE_ZEROES; + cmd.write_zeroes.nsid = rt_cpu_to_le32(ndev->nsid); + + slba = 0; + lbas = rt_le64_to_cpu(ndev->id.nsze); + max_lbas = 1 << (nvme->max_transfer_shift - ndev->lba_shift); + + while ((rt_ssize_t)lbas > 0) + { + if (lbas < max_lbas) + { + max_lbas = (rt_uint16_t)lbas; + } + + cmd.write_zeroes.slba = rt_cpu_to_le16(slba); + cmd.write_zeroes.length = rt_cpu_to_le16(max_lbas - 1); + + if ((err = nvme_submit_io_cmd(nvme, &cmd))) + { + break; + } + + lbas -= max_lbas; + slba += max_lbas; + } + + return err; +} + +static rt_err_t nvme_blk_autorefresh(struct rt_blk_disk *disk, rt_bool_t is_auto) +{ + struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk); + struct rt_nvme_controller *nvme = ndev->ctrl; + + if (nvme->volatile_write_cache & RT_NVME_CTRL_VWC_PRESENT) + { + return nvme_set_features_simple(nvme, RT_NVME_FEAT_VOLATILE_WC, !!is_auto); + } + else if (!is_auto) + { + return RT_EOK; + } + + return -RT_ENOSYS; +} + +static const struct rt_blk_disk_ops nvme_blk_ops = +{ + .read = nvme_blk_read, + .write = nvme_blk_write, + .getgeome = nvme_blk_getgeome, + .sync = nvme_blk_sync, + .erase = nvme_blk_erase, + .autorefresh = nvme_blk_autorefresh, +}; + +static void nvme_queue_isr(int irqno, void *param) +{ + rt_ubase_t level; + rt_uint16_t head, phase, status; + struct rt_nvme_queue *queue = param; + struct rt_nvme_controller *nvme = queue->nvme; + + level = rt_spin_lock_irqsave(&queue->lock); + + head = queue->cq_head; + phase = queue->cq_phase; + status = HWREG16(&queue->cq_entry[head].status); + status = rt_le16_to_cpu(status); + + if ((status & 0x01) == phase) + { + if ((status >> 1)) + { + queue->err = -RT_EIO; + goto _end_cmd; + } + + if (nvme->ops->complete_cmd) + { + nvme->ops->complete_cmd(queue, queue->cmd); + } + + _end_cmd: + if (++head == queue->depth) + { + head = 0; + phase = !phase; + } + + HWREG32(queue->doorbell + nvme->doorbell_stride) = head; + queue->cq_head = head; + queue->cq_phase = phase; + + rt_completion_done(&queue->done); + } + + rt_spin_unlock_irqrestore(&queue->lock, level); +} + +static rt_err_t nvme_identify(struct rt_nvme_controller *nvme, + rt_uint32_t nsid, rt_uint32_t cns, void *data) +{ + rt_err_t err; + rt_uint32_t page_size = nvme->page_size; + rt_ubase_t data_phy = (rt_ubase_t)rt_kmem_v2p(data); + int offset = data_phy & (page_size - 1); + struct rt_nvme_command cmd; + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.identify.opcode = RT_NVME_ADMIN_OPCODE_IDENTIFY; + cmd.identify.nsid = rt_cpu_to_le32(nsid); + cmd.identify.prp1 = rt_cpu_to_le64(data_phy); + + if (sizeof(struct rt_nvme_id_ctrl) <= page_size - offset) + { + cmd.identify.prp2 = 0; + } + else + { + data_phy += (page_size - offset); + cmd.identify.prp2 = rt_cpu_to_le64(data_phy); + } + cmd.identify.cns = rt_cpu_to_le32(cns); + + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, data, sizeof(struct rt_nvme_id_ctrl)); + + if (!(err = nvme_submit_cmd(&nvme->admin_queue, &cmd))) + { + rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, data, sizeof(struct rt_nvme_id_ctrl)); + } + + return err; +} + +static rt_err_t nvme_attach_queue(struct rt_nvme_queue *queue, rt_uint8_t opcode) +{ + struct rt_nvme_command cmd; + struct rt_nvme_controller *nvme = queue->nvme; + rt_uint16_t flags = RT_NVME_QUEUE_PHYS_CONTIG; + + rt_memset(&cmd, 0, sizeof(cmd)); + + if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_CQ) + { + cmd.create_cq.opcode = opcode; + cmd.create_cq.prp1 = rt_cpu_to_le64(queue->cq_entry_phy); + cmd.create_cq.cqid = rt_cpu_to_le16(queue->qid); + cmd.create_cq.qsize = rt_cpu_to_le16(queue->depth - 1); + cmd.create_cq.cq_flags = rt_cpu_to_le16(flags | RT_NVME_CQ_IRQ_ENABLED); + cmd.create_cq.irq_vector = rt_cpu_to_le16(nvme->irqs_nr > 1 ? queue->qid : 0); + } + else if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_SQ) + { + cmd.create_sq.opcode = opcode; + cmd.create_sq.prp1 = rt_cpu_to_le64(queue->sq_cmds_phy); + cmd.create_sq.sqid = rt_cpu_to_le16(queue->qid); + cmd.create_sq.qsize = rt_cpu_to_le16(queue->depth - 1); + cmd.create_sq.sq_flags = rt_cpu_to_le16(flags | RT_NVME_SQ_PRIO_MEDIUM); + cmd.create_sq.cqid = rt_cpu_to_le16(queue->qid); + } + else + { + LOG_E("What the fuck opcode = %x", opcode); + RT_ASSERT(0); + } + + return nvme_submit_cmd(&nvme->admin_queue, &cmd); +} + +rt_inline rt_err_t nvme_attach_queue_sq(struct rt_nvme_queue *queue) +{ + return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_SQ); +} + +rt_inline rt_err_t nvme_attach_queue_cq(struct rt_nvme_queue *queue) +{ + return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_CQ); +} + +static rt_err_t nvme_detach_queue(struct rt_nvme_queue *queue, + rt_uint8_t opcode) +{ + struct rt_nvme_command cmd; + struct rt_nvme_controller *nvme = queue->nvme; + + rt_memset(&cmd, 0, sizeof(cmd)); + cmd.delete_queue.opcode = opcode; + cmd.delete_queue.qid = rt_cpu_to_le16(queue->qid); + + return nvme_submit_cmd(&nvme->admin_queue, &cmd); +} + +rt_inline rt_ubase_t nvme_queue_dma_flags(void) +{ + return RT_DMA_F_NOCACHE | RT_DMA_F_LINEAR; +} + +static void nvme_free_queue(struct rt_nvme_queue *queue) +{ + rt_ubase_t dma_flags; + struct rt_nvme_controller *nvme = queue->nvme; + + if (nvme->ops->cleanup_queue) + { + rt_err_t err; + + if (!(err = nvme->ops->cleanup_queue(queue))) + { + LOG_W("Cleanup[%s] queue error = %s", nvme->ops->name, rt_strerror(err)); + } + } + + dma_flags = nvme_queue_dma_flags(); + + if (queue->sq_cmds) + { + rt_dma_free(nvme->dev, sizeof(*queue->sq_cmds) * queue->depth, + queue->sq_cmds, queue->sq_cmds_phy, dma_flags); + } + + if (queue->cq_entry) + { + rt_dma_free(nvme->dev, sizeof(*queue->cq_entry) * queue->depth, + queue->cq_entry, queue->cq_entry_phy, dma_flags); + } +} + +static struct rt_nvme_queue *nvme_alloc_queue(struct rt_nvme_controller *nvme, + int qid, int depth) +{ + rt_err_t err; + rt_ubase_t dma_flags; + struct rt_nvme_queue *queue = &nvme->queue[qid]; + + rt_memset(queue, 0, sizeof(*queue)); + + queue->nvme = nvme; + queue->doorbell = &nvme->doorbell_tbl[qid * 2 * nvme->doorbell_stride]; + queue->qid = qid; + queue->depth = depth; + queue->cq_head = 0; + queue->cq_phase = 1; + rt_completion_init(&queue->done); + rt_spin_lock_init(&queue->lock); + + dma_flags = nvme_queue_dma_flags(); + + /* struct rt_nvme_command */ + queue->sq_cmds = rt_dma_alloc(nvme->dev, + sizeof(*queue->sq_cmds) * depth, &queue->sq_cmds_phy, dma_flags); + + if (!queue->sq_cmds) + { + err = -RT_ENOMEM; + goto _fail; + } + + /* struct rt_nvme_completion */ + queue->cq_entry = rt_dma_alloc(nvme->dev, + sizeof(*queue->cq_entry) * depth, &queue->cq_entry_phy, dma_flags); + + if (!queue->cq_entry) + { + err = -RT_ENOMEM; + goto _fail; + } + + rt_memset(queue->sq_cmds, 0, sizeof(struct rt_nvme_command) * depth); + rt_memset(queue->cq_entry, 0, sizeof(struct rt_nvme_completion) * depth); + + if (nvme->ops->setup_queue) + { + if (!(err = nvme->ops->setup_queue(queue))) + { + LOG_E("Setup[%s] queue error = %s", nvme->ops->name, rt_strerror(err)); + + goto _fail; + } + } + + return queue; + +_fail: + nvme_free_queue(queue); + + return rt_err_ptr(err); +} + +static rt_err_t nvme_configure_admin_queue(struct rt_nvme_controller *nvme) +{ + rt_err_t err; + int irq; + char name[RT_NAME_MAX]; + rt_uint32_t aqa; + rt_uint32_t page_shift = ARCH_PAGE_SHIFT; + rt_uint32_t page_min = RT_NVME_CAP_MPSMIN(nvme->cap) + 12; + rt_uint32_t page_max = RT_NVME_CAP_MPSMAX(nvme->cap) + 12; + struct rt_nvme_queue *admin_queue; + + if (page_shift < page_min) + { + LOG_E("Device %s page size (%u) %s than host (%u)", + "minimum", 1 << page_min, "larger", 1 << page_shift); + return -RT_EINVAL; + } + + if (page_shift > page_max) + { + LOG_W("Device %s page size (%u) %s than host (%u)", + "maximum", 1 << page_max, "smaller", 1 << page_shift); + page_shift = page_max; + } + + if ((err = nvme_disable_ctrl(nvme))) + { + return err; + } + + admin_queue = nvme_alloc_queue(nvme, 0, RT_NVME_AQ_DEPTH); + + if (rt_is_err(admin_queue)) + { + return rt_ptr_err(admin_queue); + } + + aqa = admin_queue->depth - 1; + aqa |= aqa << 16; + + nvme->page_shift = page_shift; + nvme->page_size = 1U << page_shift; + + nvme->ctrl_config = RT_NVME_CC_CSS_NVM; + nvme->ctrl_config |= (page_shift - 12) << RT_NVME_CC_MPS_SHIFT; + nvme->ctrl_config |= RT_NVME_CC_ARB_RR | RT_NVME_CC_SHN_NONE; + nvme->ctrl_config |= RT_NVME_CC_IOSQES | RT_NVME_CC_IOCQES; + + nvme_writel(nvme, RT_NVME_REG_AQA, aqa); + nvme_writeq(nvme, RT_NVME_REG_ASQ, admin_queue->sq_cmds_phy); + nvme_writeq(nvme, RT_NVME_REG_ACQ, admin_queue->cq_entry_phy); + + if ((err = nvme_enable_ctrl(nvme))) + { + nvme_free_queue(admin_queue); + + return err; + } + + irq = nvme->irqs[0]; + + rt_snprintf(name, RT_NAME_MAX, "%s-admin-queue", nvme->name); + + rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->admin_queue, name); + rt_hw_interrupt_umask(irq); + + return RT_EOK; +} + +static rt_err_t nvme_setup_io_queues(struct rt_nvme_controller *nvme) +{ + rt_err_t err; + rt_uint32_t value; + int irq, cpuid = 0; + char name[RT_NAME_MAX]; + rt_bool_t affinity_fixup = RT_FALSE; + RT_DECLARE_IRQ_AFFINITY(affinity) = { 0 }; + struct rt_nvme_queue *queue; + + nvme->io_queue_max = nvme->irqs_nr > 1 ? nvme->irqs_nr - 1 : 1; + value = (nvme->io_queue_max - 1) | ((nvme->io_queue_max - 1) << 16); + + if ((err = nvme_set_features_simple(nvme, RT_NVME_FEAT_NUM_QUEUES, value))) + { + return err; + } + + for (int i = 0, q_idx = 1; i < nvme->io_queue_max; ++i, ++q_idx) + { + queue = nvme_alloc_queue(nvme, q_idx, nvme->queue_depth); + + if (!queue) + { + return -RT_ENOMEM; + } + + if ((err = nvme_attach_queue_cq(queue)) || + (err = nvme_attach_queue_sq(queue))) + { + return err; + } + } + + for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx) + { + irq = nvme->irqs[irq_idx % nvme->irqs_nr]; + + rt_snprintf(name, RT_NAME_MAX, "%s-io-queue%d", nvme->name, i); + + if (!affinity_fixup) + { + RT_IRQ_AFFINITY_SET(affinity, cpuid % RT_CPUS_NR); + if (rt_pic_irq_set_affinity(irq, affinity)) + { + /* Fixup in secondary CPU startup */ + affinity_fixup = RT_TRUE; + } + RT_IRQ_AFFINITY_CLEAR(affinity, cpuid++ % RT_CPUS_NR); + } + + rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->io_queues[i], name); + rt_hw_interrupt_umask(irq); + } + + return RT_EOK; +} + +static void nvme_remove_io_queues(struct rt_nvme_controller *nvme) +{ + int irq; + struct rt_nvme_queue *queue; + + for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx) + { + queue = &nvme->io_queues[i]; + + nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_SQ); + nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_CQ); + nvme_free_queue(queue); + + irq = nvme->irqs[irq_idx % nvme->irqs_nr]; + + rt_hw_interrupt_mask(irq); + rt_pic_detach_irq(irq, queue); + } +} + +static void nvme_remove_admin_queues(struct rt_nvme_controller *nvme) +{ + int irq = nvme->irqs[0]; + + rt_hw_interrupt_mask(irq); + rt_pic_detach_irq(irq, &nvme->admin_queue); + + nvme_free_queue(&nvme->admin_queue); +} + +static void nvme_remove_devices(struct rt_nvme_controller *nvme) +{ + struct rt_nvme_device *ndev, *next_ndev; + + rt_list_for_each_entry_safe(ndev, next_ndev, &nvme->ns_nodes, list) + { + rt_list_remove(&ndev->list); + + rt_hw_blk_disk_unregister(&ndev->parent); + rt_free(ndev); + } +} + +static rt_err_t nvme_scan_device(struct rt_nvme_controller *nvme, + rt_size_t number_of_ns) +{ + rt_err_t err; + rt_uint32_t lbaf; + struct rt_nvme_id_ns *id = RT_NULL; + + if (!(id = rt_malloc_align(sizeof(*id), nvme->page_size))) + { + return -RT_ENOMEM; + } + + /* NVME Namespace is start with "1" */ + for (rt_uint32_t nsid = 1; nsid <= number_of_ns; ++nsid) + { + struct rt_nvme_device *ndev = rt_calloc(1, sizeof(*ndev)); + + if (!ndev) + { + err = -RT_ENOMEM; + goto _free_res; + } + + rt_memset(id, 0, sizeof(*id)); + if ((err = nvme_identify(nvme, nsid, 0, id))) + { + goto _free_res; + } + + if (!id->nsze) + { + continue; + } + + ndev->ctrl = nvme; + + rt_memcpy(&ndev->id, id, sizeof(ndev->id)); + lbaf = id->flbas & RT_NVME_NS_FLBAS_LBA_MASK; + lbaf |= ((id->flbas & RT_NVME_NS_FLBAS_LBA_UMASK) >> RT_NVME_NS_FLBAS_LBA_SHIFT); + + ndev->nsid = nsid; + ndev->lba_shift = id->lbaf[lbaf].ds; + + ndev->parent.ida = &nvme_ida; + ndev->parent.parallel_io = RT_TRUE; + ndev->parent.ops = &nvme_blk_ops; + ndev->parent.max_partitions = RT_BLK_PARTITION_MAX; + rt_dm_dev_set_name(&ndev->parent.parent, "%sn%u", nvme->name, nsid); + + if ((err = rt_hw_blk_disk_register(&ndev->parent))) + { + goto _free_res; + } + + rt_list_init(&ndev->list); + rt_list_insert_before(&nvme->ns_nodes, &ndev->list); + } + +_free_res: + rt_free_align(id); + + return err; +} + +rt_inline rt_size_t strip_len(const char *str, rt_size_t max_len) +{ + rt_size_t size = 0; + + for (int i = 0; *str && i < max_len; ++i, ++str) + { + if (*str != ' ') + { + size = i + 1; + } + } + + return size; +} + +rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme) +{ + rt_err_t err; + struct rt_nvme_id_ctrl *ctrl = RT_NULL; + + if (!nvme || !nvme->ops) + { + return -RT_EINVAL; + } + + if (nvme_readl(nvme, RT_NVME_REG_CSTS) == (rt_uint32_t)-1) + { + LOG_E("Out of memory"); + + return -RT_EINVAL; + } + + if ((nvme->nvme_id = rt_dm_ida_alloc(&nvme_controller_ida)) < 0) + { + return -RT_EFULL; + } + + rt_snprintf(nvme->name, RT_NAME_MAX, "nvme%u", nvme->nvme_id); + + nvme->cap = nvme_readq(nvme, RT_NVME_REG_CAP); + nvme->queue_depth = RT_NVME_CAP_MQES(nvme->cap) + 1; + nvme->doorbell_stride = 1 << RT_NVME_CAP_STRIDE(nvme->cap); + nvme->doorbell_tbl = nvme->regs + RT_NVME_REG_DBS; + + if ((err = nvme_configure_admin_queue(nvme))) + { + LOG_E("Configure admin queue error = %s", rt_strerror(err)); + goto _free_admin_queue; + } + + if ((err = nvme_setup_io_queues(nvme))) + { + LOG_E("Unable to setup I/O queues error = %s", rt_strerror(err)); + goto _free_admin_queue; + } + + if (!(ctrl = rt_malloc_align(sizeof(*ctrl), nvme->page_size))) + { + err = -RT_ENOMEM; + goto _fail; + } + + if ((err = nvme_identify(nvme, 0, 1, ctrl))) + { + goto _fail; + } + + if (ctrl->mdts) + { + nvme->max_transfer_shift = ctrl->mdts + (RT_NVME_CAP_MPSMIN(nvme->cap) + 12); + } + else + { + /* 1MB is recommended. */ + nvme->max_transfer_shift = 20; + } + nvme->volatile_write_cache = ctrl->vwc; + nvme->write_zeroes = !!(rt_le64_to_cpu(ctrl->oncs) & RT_NVME_CTRL_ONCS_WRITE_ZEROES); + + if ((rt_le32_to_cpu(ctrl->sgls) & RT_NVME_ID_SGL_SUPPORT_MASK)) + { + nvme->sgl_mode = RT_NVME_PSDT_SGL_MPTR_SGL; + } + + LOG_I("NVM Express v%d.%d (%s, %-*.s, %-*.s)", + nvme_readl(nvme, RT_NVME_REG_VS) >> 16, + nvme_readl(nvme, RT_NVME_REG_VS) & 0xff, + nvme->ops->name, + strip_len(ctrl->mn, sizeof(ctrl->mn)), ctrl->mn, + strip_len(ctrl->fr, sizeof(ctrl->fr)), ctrl->fr); + + rt_list_init(&nvme->ns_nodes); + if ((err = nvme_scan_device(nvme, rt_le32_to_cpu(ctrl->nn)))) + { + goto _fail; + } + + rt_free_align(ctrl); + + rt_spin_lock(&nvme_lock); + rt_list_insert_after(&nvme_nodes, &nvme->list); + rt_spin_unlock(&nvme_lock); + + return RT_EOK; + +_fail: + if (ctrl) + { + rt_free_align(ctrl); + } + nvme_remove_devices(nvme); + nvme_remove_io_queues(nvme); +_free_admin_queue: + nvme_remove_admin_queues(nvme); + + rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id); + + return err; +} + +rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme) +{ + rt_err_t err; + + if (!nvme) + { + return -RT_EINVAL; + } + + rt_spin_lock(&nvme_lock); + rt_list_remove(&nvme->list); + rt_spin_unlock(&nvme_lock); + + nvme_remove_devices(nvme); + nvme_remove_io_queues(nvme); + nvme_remove_admin_queues(nvme); + + rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id); + + if (!(err = nvme_shutdown_ctrl(nvme))) + { + err = nvme_disable_ctrl(nvme); + } + else + { + LOG_E("%s: shutdown error = %s", nvme->name, rt_strerror(err)); + } + + return err; +} + +/* + * NVME's IO queue should be Per-CPU, fixup the affinity after the secondary CPU + * startup, this stage can make sure the affinity setting success as possible. + */ +static int nvme_queue_affinify_fixup(void) +{ + int cpuid = rt_hw_cpu_id(); + struct rt_nvme_controller *nvme; + RT_DECLARE_IRQ_AFFINITY(affinity) = { 0 }; + RT_DECLARE_IRQ_AFFINITY(current_affinity) = { 0 }; + + RT_IRQ_AFFINITY_SET(affinity, cpuid); + + rt_hw_spin_lock(&nvme_lock.lock); + rt_list_for_each_entry(nvme, &nvme_nodes, list) + { + for (int i = cpuid % RT_CPUS_NR; i < nvme->io_queue_max; i += RT_CPUS_NR) + { + int irq = nvme->irqs[i]; + + if (!rt_pic_irq_get_affinity(irq, current_affinity) && + !rt_bitmap_test_bit(current_affinity, cpuid)) + { + rt_ubase_t level = rt_hw_interrupt_disable(); + + rt_pic_irq_set_affinity(irq, affinity); + + rt_hw_interrupt_enable(level); + } + } + } + rt_hw_spin_unlock(&nvme_lock.lock); + + return 0; +} +INIT_SECONDARY_CPU_EXPORT(nvme_queue_affinify_fixup); diff --git a/include/rtdef.h b/include/rtdef.h index 244738a0da..05c59f8908 100644 --- a/include/rtdef.h +++ b/include/rtdef.h @@ -1378,6 +1378,9 @@ struct rt_device void *ofw_node; /**< ofw node get from device tree */ #endif /* RT_USING_OFW */ void *power_domain_unit; +#ifdef RT_USING_DMA + const void *dma_ops; +#endif #endif /* RT_USING_DM */ enum rt_device_class_type type; /**< device type */