Print this page
don't block in nvme_bd_cmd
8628 nvme: use a semaphore to guard submission queue
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 42,52 ****
* vector and will post them to a taskq for completion processing.
*
*
* Command Processing:
*
! * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up
* to 65536 I/O commands. The driver will configure one I/O queue pair per
* available interrupt vector, with the queue length usually much smaller than
* the maximum of 65536. If the hardware doesn't provide enough queues, fewer
* interrupt vectors will be used.
*
--- 42,52 ----
* vector and will post them to a taskq for completion processing.
*
*
* Command Processing:
*
! * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
* to 65536 I/O commands. The driver will configure one I/O queue pair per
* available interrupt vector, with the queue length usually much smaller than
* the maximum of 65536. If the hardware doesn't provide enough queues, fewer
* interrupt vectors will be used.
*
*** 67,77 ****
* array index is used as command identifier (CID) in the submission queue
* entry. Some commands may take a very long time to complete, and if the queue
* wraps around in that time a submission may find the next array slot to still
* be used by a long-running command. In this case the array is sequentially
* searched for the next free slot. The length of the command array is the same
! * as the configured queue length.
*
*
* Polled I/O Support:
*
* For kernel core dump support the driver can do polled I/O. As interrupts are
--- 67,78 ----
* array index is used as command identifier (CID) in the submission queue
* entry. Some commands may take a very long time to complete, and if the queue
* wraps around in that time a submission may find the next array slot to still
* be used by a long-running command. In this case the array is sequentially
* searched for the next free slot. The length of the command array is the same
! * as the configured queue length. Queue overrun is prevented by the semaphore,
! * so a command submission may block if the queue is full.
*
*
* Polled I/O Support:
*
* For kernel core dump support the driver can do polled I/O. As interrupts are
*** 255,265 ****
static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
static void nvme_free_cmd(nvme_cmd_t *);
static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
bd_xfer_t *);
static int nvme_admin_cmd(nvme_cmd_t *, int);
! static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
static void nvme_wakeup_cmd(void *);
static void nvme_async_event_task(void *);
--- 256,268 ----
static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
static void nvme_free_cmd(nvme_cmd_t *);
static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
bd_xfer_t *);
static int nvme_admin_cmd(nvme_cmd_t *, int);
! static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *);
! static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *);
! static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *);
static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
static void nvme_wakeup_cmd(void *);
static void nvme_async_event_task(void *);
*** 269,279 ****
static int nvme_check_specific_cmd_status(nvme_cmd_t *);
static int nvme_check_generic_cmd_status(nvme_cmd_t *);
static inline int nvme_check_cmd_status(nvme_cmd_t *);
static void nvme_abort_cmd(nvme_cmd_t *);
! static int nvme_async_event(nvme_t *);
static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
boolean_t, uint8_t);
static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
static void *nvme_identify(nvme_t *, uint32_t);
static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
--- 272,282 ----
static int nvme_check_specific_cmd_status(nvme_cmd_t *);
static int nvme_check_generic_cmd_status(nvme_cmd_t *);
static inline int nvme_check_cmd_status(nvme_cmd_t *);
static void nvme_abort_cmd(nvme_cmd_t *);
! static void nvme_async_event(nvme_t *);
static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
boolean_t, uint8_t);
static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
static void *nvme_identify(nvme_t *, uint32_t);
static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
*** 719,728 ****
--- 722,732 ----
nvme_free_qpair(nvme_qpair_t *qp)
{
int i;
mutex_destroy(&qp->nq_mutex);
+ sema_destroy(&qp->nq_sema);
if (qp->nq_sqdma != NULL)
nvme_free_dma(qp->nq_sqdma);
if (qp->nq_cqdma != NULL)
nvme_free_dma(qp->nq_cqdma);
*** 744,753 ****
--- 748,758 ----
{
nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP);
mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(nvme->n_intr_pri));
+ sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL);
if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
goto fail;
*** 810,831 ****
mutex_destroy(&cmd->nc_mutex);
kmem_cache_free(nvme_cmd_cache, cmd);
}
static int
! nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
{
! nvme_reg_sqtdbl_t tail = { 0 };
! mutex_enter(&qp->nq_mutex);
! if (qp->nq_active_cmds == qp->nq_nentry) {
! mutex_exit(&qp->nq_mutex);
! return (DDI_FAILURE);
! }
cmd->nc_completed = B_FALSE;
/*
* Try to insert the cmd into the active cmd array at the nq_next_cmd
* slot. If the slot is already occupied advance to the next slot and
--- 815,847 ----
mutex_destroy(&cmd->nc_mutex);
kmem_cache_free(nvme_cmd_cache, cmd);
}
+ static void
+ nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+ {
+ sema_p(&qp->nq_sema);
+ nvme_submit_cmd_common(qp, cmd);
+ }
+
static int
! nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
{
! if (sema_tryp(&qp->nq_sema) == 0)
! return (EAGAIN);
! nvme_submit_cmd_common(qp, cmd);
! return (0);
! }
! static void
! nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd)
! {
! nvme_reg_sqtdbl_t tail = { 0 };
+ mutex_enter(&qp->nq_mutex);
cmd->nc_completed = B_FALSE;
/*
* Try to insert the cmd into the active cmd array at the nq_next_cmd
* slot. If the slot is already occupied advance to the next slot and
*** 847,857 ****
tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
mutex_exit(&qp->nq_mutex);
- return (DDI_SUCCESS);
}
static nvme_cmd_t *
nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
{
--- 863,872 ----
*** 893,902 ****
--- 908,918 ----
if (qp->nq_cqhead == 0)
qp->nq_phase = qp->nq_phase ? 0 : 1;
nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
mutex_exit(&qp->nq_mutex);
+ sema_v(&qp->nq_sema);
return (cmd);
}
static int
*** 1361,1371 ****
nvme_t *nvme = cmd->nc_nvme;
nvme_error_log_entry_t *error_log = NULL;
nvme_health_log_t *health_log = NULL;
size_t logsize = 0;
nvme_async_event_t event;
- int ret;
/*
* Check for errors associated with the async request itself. The only
* command-specific error is "async event limit exceeded", which
* indicates a programming error in the driver and causes a panic in
--- 1377,1386 ----
*** 1395,1413 ****
event.r = cmd->nc_cqe.cqe_dw0;
/* Clear CQE and re-submit the async request. */
bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
! ret = nvme_submit_cmd(nvme->n_adminq, cmd);
- if (ret != DDI_SUCCESS) {
- dev_err(nvme->n_dip, CE_WARN,
- "!failed to resubmit async event request");
- atomic_inc_32(&nvme->n_async_resubmit_failed);
- nvme_free_cmd(cmd);
- }
-
switch (event.b.ae_type) {
case NVME_ASYNC_TYPE_ERROR:
if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
(void) nvme_get_logpage(nvme, (void **)&error_log,
&logsize, event.b.ae_logpage);
--- 1410,1421 ----
event.r = cmd->nc_cqe.cqe_dw0;
/* Clear CQE and re-submit the async request. */
bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
! nvme_submit_admin_cmd(nvme->n_adminq, cmd);
switch (event.b.ae_type) {
case NVME_ASYNC_TYPE_ERROR:
if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
(void) nvme_get_logpage(nvme, (void **)&error_log,
&logsize, event.b.ae_logpage);
*** 1515,1538 ****
}
static int
nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
{
- int ret;
-
mutex_enter(&cmd->nc_mutex);
! ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd);
- if (ret != DDI_SUCCESS) {
- mutex_exit(&cmd->nc_mutex);
- dev_err(cmd->nc_nvme->n_dip, CE_WARN,
- "!nvme_submit_cmd failed");
- atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full);
- nvme_free_cmd(cmd);
- return (DDI_FAILURE);
- }
-
if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
/*
* The command timed out. An abort command was posted that
* will take care of the cleanup.
*/
--- 1523,1535 ----
}
static int
nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
{
mutex_enter(&cmd->nc_mutex);
! nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd);
if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
/*
* The command timed out. An abort command was posted that
* will take care of the cleanup.
*/
*** 1541,1570 ****
mutex_exit(&cmd->nc_mutex);
return (DDI_SUCCESS);
}
! static int
nvme_async_event(nvme_t *nvme)
{
nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
- int ret;
cmd->nc_sqid = 0;
cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
cmd->nc_callback = nvme_async_event_task;
! ret = nvme_submit_cmd(nvme->n_adminq, cmd);
!
! if (ret != DDI_SUCCESS) {
! dev_err(nvme->n_dip, CE_WARN,
! "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT");
! nvme_free_cmd(cmd);
! return (DDI_FAILURE);
! }
!
! return (DDI_SUCCESS);
}
static int
nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
uint8_t pi, boolean_t pil, uint8_t ses)
--- 1538,1557 ----
mutex_exit(&cmd->nc_mutex);
return (DDI_SUCCESS);
}
! static void
nvme_async_event(nvme_t *nvme)
{
nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
cmd->nc_sqid = 0;
cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
cmd->nc_callback = nvme_async_event_task;
! nvme_submit_admin_cmd(nvme->n_adminq, cmd);
}
static int
nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
uint8_t pi, boolean_t pil, uint8_t ses)
*** 2377,2391 ****
}
/*
* Post an asynchronous event command to catch errors.
*/
! if (nvme_async_event(nvme) != DDI_SUCCESS) {
! dev_err(nvme->n_dip, CE_WARN,
! "!failed to post async event");
! goto fail;
! }
/*
* Identify Controller
*/
nvme->n_idctl = nvme_identify(nvme, 0);
--- 2364,2374 ----
}
/*
* Post an asynchronous event command to catch errors.
*/
! nvme_async_event(nvme);
/*
* Identify Controller
*/
nvme->n_idctl = nvme_identify(nvme, 0);
*** 2606,2622 ****
/*
* Post more asynchronous events commands to reduce event reporting
* latency as suggested by the spec.
*/
! for (i = 1; i != nvme->n_async_event_limit; i++) {
! if (nvme_async_event(nvme) != DDI_SUCCESS) {
! dev_err(nvme->n_dip, CE_WARN,
! "!failed to post async event %d", i);
! goto fail;
! }
! }
return (DDI_SUCCESS);
fail:
(void) nvme_reset(nvme, B_FALSE);
--- 2589,2600 ----
/*
* Post more asynchronous events commands to reduce event reporting
* latency as suggested by the spec.
*/
! for (i = 1; i != nvme->n_async_event_limit; i++)
! nvme_async_event(nvme);
return (DDI_SUCCESS);
fail:
(void) nvme_reset(nvme, B_FALSE);
*** 3276,3288 ****
static int
nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
{
nvme_t *nvme = ns->ns_nvme;
! nvme_cmd_t *cmd, *ret;
nvme_qpair_t *ioq;
boolean_t poll;
if (nvme->n_dead)
return (EIO);
cmd = nvme_create_nvm_cmd(ns, opc, xfer);
--- 3254,3267 ----
static int
nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
{
nvme_t *nvme = ns->ns_nvme;
! nvme_cmd_t *cmd;
nvme_qpair_t *ioq;
boolean_t poll;
+ int ret;
if (nvme->n_dead)
return (EIO);
cmd = nvme_create_nvm_cmd(ns, opc, xfer);
*** 3298,3317 ****
* complete immediately after it was submitted, which means we must
* treat both cmd and xfer as if they have been freed already.
*/
poll = (xfer->x_flags & BD_XFER_POLL) != 0;
! if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS)
! return (EAGAIN);
if (!poll)
return (0);
do {
! ret = nvme_retrieve_cmd(nvme, ioq);
! if (ret != NULL)
! nvme_bd_xfer_done(ret);
else
drv_usecwait(10);
} while (ioq->nq_active_cmds != 0);
return (0);
--- 3277,3298 ----
* complete immediately after it was submitted, which means we must
* treat both cmd and xfer as if they have been freed already.
*/
poll = (xfer->x_flags & BD_XFER_POLL) != 0;
! ret = nvme_submit_io_cmd(ioq, cmd);
+ if (ret != 0)
+ return (ret);
+
if (!poll)
return (0);
do {
! cmd = nvme_retrieve_cmd(nvme, ioq);
! if (cmd != NULL)
! nvme_bd_xfer_done(cmd);
else
drv_usecwait(10);
} while (ioq->nq_active_cmds != 0);
return (0);