Print this page
don't block in nvme_bd_cmd
8628 nvme: use a semaphore to guard submission queue
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 42,52 **** * vector and will post them to a taskq for completion processing. * * * Command Processing: * ! * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up * to 65536 I/O commands. The driver will configure one I/O queue pair per * available interrupt vector, with the queue length usually much smaller than * the maximum of 65536. If the hardware doesn't provide enough queues, fewer * interrupt vectors will be used. * --- 42,52 ---- * vector and will post them to a taskq for completion processing. * * * Command Processing: * ! * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up * to 65536 I/O commands. The driver will configure one I/O queue pair per * available interrupt vector, with the queue length usually much smaller than * the maximum of 65536. If the hardware doesn't provide enough queues, fewer * interrupt vectors will be used. *
*** 67,77 **** * array index is used as command identifier (CID) in the submission queue * entry. Some commands may take a very long time to complete, and if the queue * wraps around in that time a submission may find the next array slot to still * be used by a long-running command. In this case the array is sequentially * searched for the next free slot. The length of the command array is the same ! * as the configured queue length. * * * Polled I/O Support: * * For kernel core dump support the driver can do polled I/O. As interrupts are --- 67,78 ---- * array index is used as command identifier (CID) in the submission queue * entry. Some commands may take a very long time to complete, and if the queue * wraps around in that time a submission may find the next array slot to still * be used by a long-running command. In this case the array is sequentially * searched for the next free slot. The length of the command array is the same ! * as the configured queue length. Queue overrun is prevented by the semaphore, ! * so a command submission may block if the queue is full. * * * Polled I/O Support: * * For kernel core dump support the driver can do polled I/O. As interrupts are
*** 255,265 **** static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int); static void nvme_free_cmd(nvme_cmd_t *); static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t, bd_xfer_t *); static int nvme_admin_cmd(nvme_cmd_t *, int); ! static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *); static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *); static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t); static void nvme_wakeup_cmd(void *); static void nvme_async_event_task(void *); --- 256,268 ---- static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int); static void nvme_free_cmd(nvme_cmd_t *); static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t, bd_xfer_t *); static int nvme_admin_cmd(nvme_cmd_t *, int); ! static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *); ! static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *); ! static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *); static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *); static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t); static void nvme_wakeup_cmd(void *); static void nvme_async_event_task(void *);
*** 269,279 **** static int nvme_check_specific_cmd_status(nvme_cmd_t *); static int nvme_check_generic_cmd_status(nvme_cmd_t *); static inline int nvme_check_cmd_status(nvme_cmd_t *); static void nvme_abort_cmd(nvme_cmd_t *); ! static int nvme_async_event(nvme_t *); static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t, boolean_t, uint8_t); static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...); static void *nvme_identify(nvme_t *, uint32_t); static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t, --- 272,282 ---- static int nvme_check_specific_cmd_status(nvme_cmd_t *); static int nvme_check_generic_cmd_status(nvme_cmd_t *); static inline int nvme_check_cmd_status(nvme_cmd_t *); static void nvme_abort_cmd(nvme_cmd_t *); ! static void nvme_async_event(nvme_t *); static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t, boolean_t, uint8_t); static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...); static void *nvme_identify(nvme_t *, uint32_t); static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
*** 719,728 **** --- 722,732 ---- nvme_free_qpair(nvme_qpair_t *qp) { int i; mutex_destroy(&qp->nq_mutex); + sema_destroy(&qp->nq_sema); if (qp->nq_sqdma != NULL) nvme_free_dma(qp->nq_sqdma); if (qp->nq_cqdma != NULL) nvme_free_dma(qp->nq_cqdma);
*** 744,753 **** --- 748,758 ---- { nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP); mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER, DDI_INTR_PRI(nvme->n_intr_pri)); + sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL); if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t), DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS) goto fail;
*** 810,831 **** mutex_destroy(&cmd->nc_mutex); kmem_cache_free(nvme_cmd_cache, cmd); } static int ! nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) { ! nvme_reg_sqtdbl_t tail = { 0 }; ! mutex_enter(&qp->nq_mutex); ! if (qp->nq_active_cmds == qp->nq_nentry) { ! mutex_exit(&qp->nq_mutex); ! return (DDI_FAILURE); ! } cmd->nc_completed = B_FALSE; /* * Try to insert the cmd into the active cmd array at the nq_next_cmd * slot. If the slot is already occupied advance to the next slot and --- 815,847 ---- mutex_destroy(&cmd->nc_mutex); kmem_cache_free(nvme_cmd_cache, cmd); } + static void + nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) + { + sema_p(&qp->nq_sema); + nvme_submit_cmd_common(qp, cmd); + } + static int ! nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) { ! if (sema_tryp(&qp->nq_sema) == 0) ! return (EAGAIN); ! nvme_submit_cmd_common(qp, cmd); ! return (0); ! } ! static void ! nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd) ! { ! nvme_reg_sqtdbl_t tail = { 0 }; + mutex_enter(&qp->nq_mutex); cmd->nc_completed = B_FALSE; /* * Try to insert the cmd into the active cmd array at the nq_next_cmd * slot. If the slot is already occupied advance to the next slot and
*** 847,857 **** tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry; nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r); mutex_exit(&qp->nq_mutex); - return (DDI_SUCCESS); } static nvme_cmd_t * nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp) { --- 863,872 ----
*** 893,902 **** --- 908,918 ---- if (qp->nq_cqhead == 0) qp->nq_phase = qp->nq_phase ? 0 : 1; nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r); mutex_exit(&qp->nq_mutex); + sema_v(&qp->nq_sema); return (cmd); } static int
*** 1361,1371 **** nvme_t *nvme = cmd->nc_nvme; nvme_error_log_entry_t *error_log = NULL; nvme_health_log_t *health_log = NULL; size_t logsize = 0; nvme_async_event_t event; - int ret; /* * Check for errors associated with the async request itself. The only * command-specific error is "async event limit exceeded", which * indicates a programming error in the driver and causes a panic in --- 1377,1386 ----
*** 1395,1413 **** event.r = cmd->nc_cqe.cqe_dw0; /* Clear CQE and re-submit the async request. */ bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t)); ! ret = nvme_submit_cmd(nvme->n_adminq, cmd); - if (ret != DDI_SUCCESS) { - dev_err(nvme->n_dip, CE_WARN, - "!failed to resubmit async event request"); - atomic_inc_32(&nvme->n_async_resubmit_failed); - nvme_free_cmd(cmd); - } - switch (event.b.ae_type) { case NVME_ASYNC_TYPE_ERROR: if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) { (void) nvme_get_logpage(nvme, (void **)&error_log, &logsize, event.b.ae_logpage); --- 1410,1421 ---- event.r = cmd->nc_cqe.cqe_dw0; /* Clear CQE and re-submit the async request. */ bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t)); ! nvme_submit_admin_cmd(nvme->n_adminq, cmd); switch (event.b.ae_type) { case NVME_ASYNC_TYPE_ERROR: if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) { (void) nvme_get_logpage(nvme, (void **)&error_log, &logsize, event.b.ae_logpage);
*** 1515,1538 **** } static int nvme_admin_cmd(nvme_cmd_t *cmd, int sec) { - int ret; - mutex_enter(&cmd->nc_mutex); ! ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd); - if (ret != DDI_SUCCESS) { - mutex_exit(&cmd->nc_mutex); - dev_err(cmd->nc_nvme->n_dip, CE_WARN, - "!nvme_submit_cmd failed"); - atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full); - nvme_free_cmd(cmd); - return (DDI_FAILURE); - } - if (nvme_wait_cmd(cmd, sec) == B_FALSE) { /* * The command timed out. An abort command was posted that * will take care of the cleanup. */ --- 1523,1535 ---- } static int nvme_admin_cmd(nvme_cmd_t *cmd, int sec) { mutex_enter(&cmd->nc_mutex); ! nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd); if (nvme_wait_cmd(cmd, sec) == B_FALSE) { /* * The command timed out. An abort command was posted that * will take care of the cleanup. */
*** 1541,1570 **** mutex_exit(&cmd->nc_mutex); return (DDI_SUCCESS); } ! static int nvme_async_event(nvme_t *nvme) { nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); - int ret; cmd->nc_sqid = 0; cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT; cmd->nc_callback = nvme_async_event_task; ! ret = nvme_submit_cmd(nvme->n_adminq, cmd); ! ! if (ret != DDI_SUCCESS) { ! dev_err(nvme->n_dip, CE_WARN, ! "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT"); ! nvme_free_cmd(cmd); ! return (DDI_FAILURE); ! } ! ! return (DDI_SUCCESS); } static int nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms, uint8_t pi, boolean_t pil, uint8_t ses) --- 1538,1557 ---- mutex_exit(&cmd->nc_mutex); return (DDI_SUCCESS); } ! static void nvme_async_event(nvme_t *nvme) { nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); cmd->nc_sqid = 0; cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT; cmd->nc_callback = nvme_async_event_task; ! nvme_submit_admin_cmd(nvme->n_adminq, cmd); } static int nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms, uint8_t pi, boolean_t pil, uint8_t ses)
*** 2377,2391 **** } /* * Post an asynchronous event command to catch errors. */ ! if (nvme_async_event(nvme) != DDI_SUCCESS) { ! dev_err(nvme->n_dip, CE_WARN, ! "!failed to post async event"); ! goto fail; ! } /* * Identify Controller */ nvme->n_idctl = nvme_identify(nvme, 0); --- 2364,2374 ---- } /* * Post an asynchronous event command to catch errors. */ ! nvme_async_event(nvme); /* * Identify Controller */ nvme->n_idctl = nvme_identify(nvme, 0);
*** 2606,2622 **** /* * Post more asynchronous events commands to reduce event reporting * latency as suggested by the spec. */ ! for (i = 1; i != nvme->n_async_event_limit; i++) { ! if (nvme_async_event(nvme) != DDI_SUCCESS) { ! dev_err(nvme->n_dip, CE_WARN, ! "!failed to post async event %d", i); ! goto fail; ! } ! } return (DDI_SUCCESS); fail: (void) nvme_reset(nvme, B_FALSE); --- 2589,2600 ---- /* * Post more asynchronous events commands to reduce event reporting * latency as suggested by the spec. */ ! for (i = 1; i != nvme->n_async_event_limit; i++) ! nvme_async_event(nvme); return (DDI_SUCCESS); fail: (void) nvme_reset(nvme, B_FALSE);
*** 3276,3288 **** static int nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) { nvme_t *nvme = ns->ns_nvme; ! nvme_cmd_t *cmd, *ret; nvme_qpair_t *ioq; boolean_t poll; if (nvme->n_dead) return (EIO); cmd = nvme_create_nvm_cmd(ns, opc, xfer); --- 3254,3267 ---- static int nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) { nvme_t *nvme = ns->ns_nvme; ! nvme_cmd_t *cmd; nvme_qpair_t *ioq; boolean_t poll; + int ret; if (nvme->n_dead) return (EIO); cmd = nvme_create_nvm_cmd(ns, opc, xfer);
*** 3298,3317 **** * complete immediately after it was submitted, which means we must * treat both cmd and xfer as if they have been freed already. */ poll = (xfer->x_flags & BD_XFER_POLL) != 0; ! if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS) ! return (EAGAIN); if (!poll) return (0); do { ! ret = nvme_retrieve_cmd(nvme, ioq); ! if (ret != NULL) ! nvme_bd_xfer_done(ret); else drv_usecwait(10); } while (ioq->nq_active_cmds != 0); return (0); --- 3277,3298 ---- * complete immediately after it was submitted, which means we must * treat both cmd and xfer as if they have been freed already. */ poll = (xfer->x_flags & BD_XFER_POLL) != 0; ! ret = nvme_submit_io_cmd(ioq, cmd); + if (ret != 0) + return (ret); + if (!poll) return (0); do { ! cmd = nvme_retrieve_cmd(nvme, ioq); ! if (cmd != NULL) ! nvme_bd_xfer_done(cmd); else drv_usecwait(10); } while (ioq->nq_active_cmds != 0); return (0);