27 * endian systems without changes to the code accessing registers and data
28 * structures used by the hardware.
29 *
30 *
31 * Interrupt Usage:
32 *
33 * The driver will use a single interrupt while configuring the device as the
34 * specification requires, but contrary to the specification it will try to use
35 * a single-message MSI(-X) or FIXED interrupt. Later in the attach process it
36 * will switch to multiple-message MSI(-X) if supported. The driver wants to
37 * have one interrupt vector per CPU, but it will work correctly if less are
38 * available. Interrupts can be shared by queues, the interrupt handler will
39 * iterate through the I/O queue array by steps of n_intr_cnt. Usually only
40 * the admin queue will share an interrupt with one I/O queue. The interrupt
41 * handler will retrieve completed commands from all queues sharing an interrupt
42 * vector and will post them to a taskq for completion processing.
43 *
44 *
45 * Command Processing:
46 *
47 * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up
48 * to 65536 I/O commands. The driver will configure one I/O queue pair per
49 * available interrupt vector, with the queue length usually much smaller than
50 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer
51 * interrupt vectors will be used.
52 *
53 * Additionally the hardware provides a single special admin queue pair that can
54 * hold up to 4096 admin commands.
55 *
56 * From the hardware perspective both queues of a queue pair are independent,
57 * but they share some driver state: the command array (holding pointers to
58 * commands currently being processed by the hardware) and the active command
59 * counter. Access to the submission side of a queue pair and the shared state
60 * is protected by nq_mutex. The completion side of a queue pair does not need
61 * that protection apart from its access to the shared state; it is called only
62 * in the interrupt handler which does not run concurrently for the same
63 * interrupt vector.
64 *
65 * When a command is submitted to a queue pair the active command counter is
66 * incremented and a pointer to the command is stored in the command array. The
67 * array index is used as command identifier (CID) in the submission queue
68 * entry. Some commands may take a very long time to complete, and if the queue
69 * wraps around in that time a submission may find the next array slot to still
70 * be used by a long-running command. In this case the array is sequentially
71 * searched for the next free slot. The length of the command array is the same
72 * as the configured queue length.
73 *
74 *
75 * Polled I/O Support:
76 *
77 * For kernel core dump support the driver can do polled I/O. As interrupts are
78 * turned off while dumping the driver will just submit a command in the regular
79 * way, and then repeatedly attempt a command retrieval until it gets the
80 * command back.
81 *
82 *
83 * Namespace Support:
84 *
85 * NVMe devices can have multiple namespaces, each being a independent data
86 * store. The driver supports multiple namespaces and creates a blkdev interface
87 * for each namespace found. Namespaces can have various attributes to support
88 * thin provisioning and protection information. This driver does not support
89 * any of this and ignores namespaces that have these attributes.
90 *
91 * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier
92 * (EUI64). This driver uses the EUI64 if present to generate the devid and
240
241 /* tunable for FORMAT NVM command timeout in seconds, default is 600s */
242 int nvme_format_cmd_timeout = 600;
243
244 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
245 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
246 static int nvme_quiesce(dev_info_t *);
247 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *);
248 static int nvme_setup_interrupts(nvme_t *, int, int);
249 static void nvme_release_interrupts(nvme_t *);
250 static uint_t nvme_intr(caddr_t, caddr_t);
251
252 static void nvme_shutdown(nvme_t *, int, boolean_t);
253 static boolean_t nvme_reset(nvme_t *, boolean_t);
254 static int nvme_init(nvme_t *);
255 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
256 static void nvme_free_cmd(nvme_cmd_t *);
257 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
258 bd_xfer_t *);
259 static int nvme_admin_cmd(nvme_cmd_t *, int);
260 static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
261 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
262 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
263 static void nvme_wakeup_cmd(void *);
264 static void nvme_async_event_task(void *);
265
266 static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
267 static int nvme_check_vendor_cmd_status(nvme_cmd_t *);
268 static int nvme_check_integrity_cmd_status(nvme_cmd_t *);
269 static int nvme_check_specific_cmd_status(nvme_cmd_t *);
270 static int nvme_check_generic_cmd_status(nvme_cmd_t *);
271 static inline int nvme_check_cmd_status(nvme_cmd_t *);
272
273 static void nvme_abort_cmd(nvme_cmd_t *);
274 static int nvme_async_event(nvme_t *);
275 static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
276 boolean_t, uint8_t);
277 static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
278 static void *nvme_identify(nvme_t *, uint32_t);
279 static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
280 uint32_t *);
281 static boolean_t nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
282 void **, size_t *);
283 static boolean_t nvme_write_cache_set(nvme_t *, boolean_t);
284 static int nvme_set_nqueues(nvme_t *, uint16_t);
285
286 static void nvme_free_dma(nvme_dma_t *);
287 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *,
288 nvme_dma_t **);
289 static int nvme_zalloc_queue_dma(nvme_t *, uint32_t, uint16_t, uint_t,
290 nvme_dma_t **);
291 static void nvme_free_qpair(nvme_qpair_t *);
292 static int nvme_alloc_qpair(nvme_t *, uint32_t, nvme_qpair_t **, int);
293 static int nvme_create_io_qpair(nvme_t *, nvme_qpair_t *, uint16_t);
294
704 goto fail;
705 }
706
707 return (DDI_SUCCESS);
708
709 fail:
710 if (*dma) {
711 nvme_free_dma(*dma);
712 *dma = NULL;
713 }
714
715 return (DDI_FAILURE);
716 }
717
718 static void
719 nvme_free_qpair(nvme_qpair_t *qp)
720 {
721 int i;
722
723 mutex_destroy(&qp->nq_mutex);
724
725 if (qp->nq_sqdma != NULL)
726 nvme_free_dma(qp->nq_sqdma);
727 if (qp->nq_cqdma != NULL)
728 nvme_free_dma(qp->nq_cqdma);
729
730 if (qp->nq_active_cmds > 0)
731 for (i = 0; i != qp->nq_nentry; i++)
732 if (qp->nq_cmd[i] != NULL)
733 nvme_free_cmd(qp->nq_cmd[i]);
734
735 if (qp->nq_cmd != NULL)
736 kmem_free(qp->nq_cmd, sizeof (nvme_cmd_t *) * qp->nq_nentry);
737
738 kmem_free(qp, sizeof (nvme_qpair_t));
739 }
740
741 static int
742 nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp,
743 int idx)
744 {
745 nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP);
746
747 mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
748 DDI_INTR_PRI(nvme->n_intr_pri));
749
750 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
751 DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
752 goto fail;
753
754 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_cqe_t),
755 DDI_DMA_READ, &qp->nq_cqdma) != DDI_SUCCESS)
756 goto fail;
757
758 qp->nq_sq = (nvme_sqe_t *)qp->nq_sqdma->nd_memp;
759 qp->nq_cq = (nvme_cqe_t *)qp->nq_cqdma->nd_memp;
760 qp->nq_nentry = nentry;
761
762 qp->nq_sqtdbl = NVME_REG_SQTDBL(nvme, idx);
763 qp->nq_cqhdbl = NVME_REG_CQHDBL(nvme, idx);
764
765 qp->nq_cmd = kmem_zalloc(sizeof (nvme_cmd_t *) * nentry, KM_SLEEP);
766 qp->nq_next_cmd = 0;
767
768 *nqp = qp;
795 }
796
797 static void
798 nvme_free_cmd(nvme_cmd_t *cmd)
799 {
800 if (cmd->nc_dma) {
801 if (cmd->nc_dma->nd_cached)
802 kmem_cache_free(cmd->nc_nvme->n_prp_cache,
803 cmd->nc_dma);
804 else
805 nvme_free_dma(cmd->nc_dma);
806 cmd->nc_dma = NULL;
807 }
808
809 cv_destroy(&cmd->nc_cv);
810 mutex_destroy(&cmd->nc_mutex);
811
812 kmem_cache_free(nvme_cmd_cache, cmd);
813 }
814
815 static int
816 nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
817 {
818 nvme_reg_sqtdbl_t tail = { 0 };
819
820 mutex_enter(&qp->nq_mutex);
821
822 if (qp->nq_active_cmds == qp->nq_nentry) {
823 mutex_exit(&qp->nq_mutex);
824 return (DDI_FAILURE);
825 }
826
827 cmd->nc_completed = B_FALSE;
828
829 /*
830 * Try to insert the cmd into the active cmd array at the nq_next_cmd
831 * slot. If the slot is already occupied advance to the next slot and
832 * try again. This can happen for long running commands like async event
833 * requests.
834 */
835 while (qp->nq_cmd[qp->nq_next_cmd] != NULL)
836 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
837 qp->nq_cmd[qp->nq_next_cmd] = cmd;
838
839 qp->nq_active_cmds++;
840
841 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd;
842 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t));
843 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah,
844 sizeof (nvme_sqe_t) * qp->nq_sqtail,
845 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV);
846 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
847
848 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
849 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
850
851 mutex_exit(&qp->nq_mutex);
852 return (DDI_SUCCESS);
853 }
854
855 static nvme_cmd_t *
856 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
857 {
858 nvme_reg_cqhdbl_t head = { 0 };
859
860 nvme_cqe_t *cqe;
861 nvme_cmd_t *cmd;
862
863 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
864 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
865
866 mutex_enter(&qp->nq_mutex);
867 cqe = &qp->nq_cq[qp->nq_cqhead];
868
869 /* Check phase tag of CQE. Hardware inverts it for new entries. */
870 if (cqe->cqe_sf.sf_p == qp->nq_phase) {
871 mutex_exit(&qp->nq_mutex);
872 return (NULL);
878 cmd = qp->nq_cmd[cqe->cqe_cid];
879 qp->nq_cmd[cqe->cqe_cid] = NULL;
880 qp->nq_active_cmds--;
881
882 ASSERT(cmd != NULL);
883 ASSERT(cmd->nc_nvme == nvme);
884 ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
885 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
886 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
887
888 qp->nq_sqhead = cqe->cqe_sqhd;
889
890 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
891
892 /* Toggle phase on wrap-around. */
893 if (qp->nq_cqhead == 0)
894 qp->nq_phase = qp->nq_phase ? 0 : 1;
895
896 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
897 mutex_exit(&qp->nq_mutex);
898
899 return (cmd);
900 }
901
902 static int
903 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
904 {
905 nvme_cqe_t *cqe = &cmd->nc_cqe;
906
907 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
908 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
909 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
910 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
911 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
912
913 if (cmd->nc_xfer != NULL)
914 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
915
916 if (cmd->nc_nvme->n_strict_version) {
917 cmd->nc_nvme->n_dead = B_TRUE;
1346 if (cmd->nc_callback == nvme_abort_cmd_cb) {
1347 mutex_exit(&cmd->nc_mutex);
1348 nvme_abort_cmd_cb(cmd);
1349 return;
1350 }
1351
1352 cmd->nc_completed = B_TRUE;
1353 cv_signal(&cmd->nc_cv);
1354 mutex_exit(&cmd->nc_mutex);
1355 }
1356
1357 static void
1358 nvme_async_event_task(void *arg)
1359 {
1360 nvme_cmd_t *cmd = arg;
1361 nvme_t *nvme = cmd->nc_nvme;
1362 nvme_error_log_entry_t *error_log = NULL;
1363 nvme_health_log_t *health_log = NULL;
1364 size_t logsize = 0;
1365 nvme_async_event_t event;
1366 int ret;
1367
1368 /*
1369 * Check for errors associated with the async request itself. The only
1370 * command-specific error is "async event limit exceeded", which
1371 * indicates a programming error in the driver and causes a panic in
1372 * nvme_check_cmd_status().
1373 *
1374 * Other possible errors are various scenarios where the async request
1375 * was aborted, or internal errors in the device. Internal errors are
1376 * reported to FMA, the command aborts need no special handling here.
1377 */
1378 if (nvme_check_cmd_status(cmd)) {
1379 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
1380 "!async event request returned failure, sct = %x, "
1381 "sc = %x, dnr = %d, m = %d", cmd->nc_cqe.cqe_sf.sf_sct,
1382 cmd->nc_cqe.cqe_sf.sf_sc, cmd->nc_cqe.cqe_sf.sf_dnr,
1383 cmd->nc_cqe.cqe_sf.sf_m);
1384
1385 if (cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
1386 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INTERNAL_ERR) {
1387 cmd->nc_nvme->n_dead = B_TRUE;
1388 ddi_fm_service_impact(cmd->nc_nvme->n_dip,
1389 DDI_SERVICE_LOST);
1390 }
1391 nvme_free_cmd(cmd);
1392 return;
1393 }
1394
1395
1396 event.r = cmd->nc_cqe.cqe_dw0;
1397
1398 /* Clear CQE and re-submit the async request. */
1399 bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
1400 ret = nvme_submit_cmd(nvme->n_adminq, cmd);
1401
1402 if (ret != DDI_SUCCESS) {
1403 dev_err(nvme->n_dip, CE_WARN,
1404 "!failed to resubmit async event request");
1405 atomic_inc_32(&nvme->n_async_resubmit_failed);
1406 nvme_free_cmd(cmd);
1407 }
1408
1409 switch (event.b.ae_type) {
1410 case NVME_ASYNC_TYPE_ERROR:
1411 if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
1412 (void) nvme_get_logpage(nvme, (void **)&error_log,
1413 &logsize, event.b.ae_logpage);
1414 } else {
1415 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
1416 "async event reply: %d", event.b.ae_logpage);
1417 atomic_inc_32(&nvme->n_wrong_logpage);
1418 }
1419
1420 switch (event.b.ae_info) {
1421 case NVME_ASYNC_ERROR_INV_SQ:
1422 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1423 "invalid submission queue");
1424 return;
1425
1426 case NVME_ASYNC_ERROR_INV_DBL:
1427 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1428 "invalid doorbell write value");
1500 break;
1501
1502 default:
1503 dev_err(nvme->n_dip, CE_WARN, "!unknown async event received, "
1504 "type = %x, info = %x, logpage = %x", event.b.ae_type,
1505 event.b.ae_info, event.b.ae_logpage);
1506 atomic_inc_32(&nvme->n_unknown_event);
1507 break;
1508 }
1509
1510 if (error_log)
1511 kmem_free(error_log, logsize);
1512
1513 if (health_log)
1514 kmem_free(health_log, logsize);
1515 }
1516
1517 static int
1518 nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
1519 {
1520 int ret;
1521
1522 mutex_enter(&cmd->nc_mutex);
1523 ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd);
1524
1525 if (ret != DDI_SUCCESS) {
1526 mutex_exit(&cmd->nc_mutex);
1527 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
1528 "!nvme_submit_cmd failed");
1529 atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full);
1530 nvme_free_cmd(cmd);
1531 return (DDI_FAILURE);
1532 }
1533
1534 if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
1535 /*
1536 * The command timed out. An abort command was posted that
1537 * will take care of the cleanup.
1538 */
1539 return (DDI_FAILURE);
1540 }
1541 mutex_exit(&cmd->nc_mutex);
1542
1543 return (DDI_SUCCESS);
1544 }
1545
1546 static int
1547 nvme_async_event(nvme_t *nvme)
1548 {
1549 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1550 int ret;
1551
1552 cmd->nc_sqid = 0;
1553 cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
1554 cmd->nc_callback = nvme_async_event_task;
1555
1556 ret = nvme_submit_cmd(nvme->n_adminq, cmd);
1557
1558 if (ret != DDI_SUCCESS) {
1559 dev_err(nvme->n_dip, CE_WARN,
1560 "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT");
1561 nvme_free_cmd(cmd);
1562 return (DDI_FAILURE);
1563 }
1564
1565 return (DDI_SUCCESS);
1566 }
1567
1568 static int
1569 nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
1570 uint8_t pi, boolean_t pil, uint8_t ses)
1571 {
1572 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1573 nvme_format_nvm_t format_nvm = { 0 };
1574 int ret;
1575
1576 format_nvm.b.fm_lbaf = lbaf & 0xf;
1577 format_nvm.b.fm_ms = ms ? 1 : 0;
1578 format_nvm.b.fm_pi = pi & 0x7;
1579 format_nvm.b.fm_pil = pil ? 1 : 0;
1580 format_nvm.b.fm_ses = ses & 0x7;
1581
1582 cmd->nc_sqid = 0;
1583 cmd->nc_callback = nvme_wakeup_cmd;
1584 cmd->nc_sqe.sqe_nsid = nsid;
1585 cmd->nc_sqe.sqe_opc = NVME_OPC_NVM_FORMAT;
2362 */
2363 sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL);
2364
2365 /*
2366 * Setup initial interrupt for admin queue.
2367 */
2368 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1)
2369 != DDI_SUCCESS) &&
2370 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1)
2371 != DDI_SUCCESS) &&
2372 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1)
2373 != DDI_SUCCESS)) {
2374 dev_err(nvme->n_dip, CE_WARN,
2375 "!failed to setup initial interrupt");
2376 goto fail;
2377 }
2378
2379 /*
2380 * Post an asynchronous event command to catch errors.
2381 */
2382 if (nvme_async_event(nvme) != DDI_SUCCESS) {
2383 dev_err(nvme->n_dip, CE_WARN,
2384 "!failed to post async event");
2385 goto fail;
2386 }
2387
2388 /*
2389 * Identify Controller
2390 */
2391 nvme->n_idctl = nvme_identify(nvme, 0);
2392 if (nvme->n_idctl == NULL) {
2393 dev_err(nvme->n_dip, CE_WARN,
2394 "!failed to identify controller");
2395 goto fail;
2396 }
2397
2398 /*
2399 * Get Vendor & Product ID
2400 */
2401 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model));
2402 model[sizeof (nvme->n_idctl->id_model)] = '\0';
2403 sata_split_model(model, &vendor, &product);
2404
2405 if (vendor == NULL)
2406 nvme->n_vendor = strdup("NVMe");
2591 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
2592 if (nvme_alloc_qpair(nvme, nvme->n_io_queue_len,
2593 &nvme->n_ioq[i], i) != DDI_SUCCESS) {
2594 dev_err(nvme->n_dip, CE_WARN,
2595 "!unable to allocate I/O qpair %d", i);
2596 goto fail;
2597 }
2598
2599 if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i)
2600 != DDI_SUCCESS) {
2601 dev_err(nvme->n_dip, CE_WARN,
2602 "!unable to create I/O qpair %d", i);
2603 goto fail;
2604 }
2605 }
2606
2607 /*
2608 * Post more asynchronous events commands to reduce event reporting
2609 * latency as suggested by the spec.
2610 */
2611 for (i = 1; i != nvme->n_async_event_limit; i++) {
2612 if (nvme_async_event(nvme) != DDI_SUCCESS) {
2613 dev_err(nvme->n_dip, CE_WARN,
2614 "!failed to post async event %d", i);
2615 goto fail;
2616 }
2617 }
2618
2619 return (DDI_SUCCESS);
2620
2621 fail:
2622 (void) nvme_reset(nvme, B_FALSE);
2623 return (DDI_FAILURE);
2624 }
2625
2626 static uint_t
2627 nvme_intr(caddr_t arg1, caddr_t arg2)
2628 {
2629 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2630 nvme_t *nvme = (nvme_t *)arg1;
2631 int inum = (int)(uintptr_t)arg2;
2632 int ccnt = 0;
2633 int qnum;
2634 nvme_cmd_t *cmd;
2635
2636 if (inum >= nvme->n_intr_cnt)
2637 return (DDI_INTR_UNCLAIMED);
3261
3262 static int
3263 nvme_bd_mediainfo(void *arg, bd_media_t *media)
3264 {
3265 nvme_namespace_t *ns = arg;
3266
3267 media->m_nblks = ns->ns_block_count;
3268 media->m_blksize = ns->ns_block_size;
3269 media->m_readonly = B_FALSE;
3270 media->m_solidstate = B_TRUE;
3271
3272 media->m_pblksize = ns->ns_best_block_size;
3273
3274 return (0);
3275 }
3276
3277 static int
3278 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
3279 {
3280 nvme_t *nvme = ns->ns_nvme;
3281 nvme_cmd_t *cmd, *ret;
3282 nvme_qpair_t *ioq;
3283 boolean_t poll;
3284
3285 if (nvme->n_dead)
3286 return (EIO);
3287
3288 cmd = nvme_create_nvm_cmd(ns, opc, xfer);
3289 if (cmd == NULL)
3290 return (ENOMEM);
3291
3292 cmd->nc_sqid = (CPU->cpu_id % nvme->n_ioq_count) + 1;
3293 ASSERT(cmd->nc_sqid <= nvme->n_ioq_count);
3294 ioq = nvme->n_ioq[cmd->nc_sqid];
3295
3296 /*
3297 * Get the polling flag before submitting the command. The command may
3298 * complete immediately after it was submitted, which means we must
3299 * treat both cmd and xfer as if they have been freed already.
3300 */
3301 poll = (xfer->x_flags & BD_XFER_POLL) != 0;
3302
3303 if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS)
3304 return (EAGAIN);
3305
3306 if (!poll)
3307 return (0);
3308
3309 do {
3310 ret = nvme_retrieve_cmd(nvme, ioq);
3311 if (ret != NULL)
3312 nvme_bd_xfer_done(ret);
3313 else
3314 drv_usecwait(10);
3315 } while (ioq->nq_active_cmds != 0);
3316
3317 return (0);
3318 }
3319
3320 static int
3321 nvme_bd_read(void *arg, bd_xfer_t *xfer)
3322 {
3323 nvme_namespace_t *ns = arg;
3324
3325 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_READ));
3326 }
3327
3328 static int
3329 nvme_bd_write(void *arg, bd_xfer_t *xfer)
3330 {
3331 nvme_namespace_t *ns = arg;
3332
|
27 * endian systems without changes to the code accessing registers and data
28 * structures used by the hardware.
29 *
30 *
31 * Interrupt Usage:
32 *
33 * The driver will use a single interrupt while configuring the device as the
34 * specification requires, but contrary to the specification it will try to use
35 * a single-message MSI(-X) or FIXED interrupt. Later in the attach process it
36 * will switch to multiple-message MSI(-X) if supported. The driver wants to
37 * have one interrupt vector per CPU, but it will work correctly if less are
38 * available. Interrupts can be shared by queues, the interrupt handler will
39 * iterate through the I/O queue array by steps of n_intr_cnt. Usually only
40 * the admin queue will share an interrupt with one I/O queue. The interrupt
41 * handler will retrieve completed commands from all queues sharing an interrupt
42 * vector and will post them to a taskq for completion processing.
43 *
44 *
45 * Command Processing:
46 *
47 * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
48 * to 65536 I/O commands. The driver will configure one I/O queue pair per
49 * available interrupt vector, with the queue length usually much smaller than
50 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer
51 * interrupt vectors will be used.
52 *
53 * Additionally the hardware provides a single special admin queue pair that can
54 * hold up to 4096 admin commands.
55 *
56 * From the hardware perspective both queues of a queue pair are independent,
57 * but they share some driver state: the command array (holding pointers to
58 * commands currently being processed by the hardware) and the active command
59 * counter. Access to the submission side of a queue pair and the shared state
60 * is protected by nq_mutex. The completion side of a queue pair does not need
61 * that protection apart from its access to the shared state; it is called only
62 * in the interrupt handler which does not run concurrently for the same
63 * interrupt vector.
64 *
65 * When a command is submitted to a queue pair the active command counter is
66 * incremented and a pointer to the command is stored in the command array. The
67 * array index is used as command identifier (CID) in the submission queue
68 * entry. Some commands may take a very long time to complete, and if the queue
69 * wraps around in that time a submission may find the next array slot to still
70 * be used by a long-running command. In this case the array is sequentially
71 * searched for the next free slot. The length of the command array is the same
72 * as the configured queue length. Queue overrun is prevented by the semaphore,
73 * so a command submission may block if the queue is full.
74 *
75 *
76 * Polled I/O Support:
77 *
78 * For kernel core dump support the driver can do polled I/O. As interrupts are
79 * turned off while dumping the driver will just submit a command in the regular
80 * way, and then repeatedly attempt a command retrieval until it gets the
81 * command back.
82 *
83 *
84 * Namespace Support:
85 *
86 * NVMe devices can have multiple namespaces, each being a independent data
87 * store. The driver supports multiple namespaces and creates a blkdev interface
88 * for each namespace found. Namespaces can have various attributes to support
89 * thin provisioning and protection information. This driver does not support
90 * any of this and ignores namespaces that have these attributes.
91 *
92 * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier
93 * (EUI64). This driver uses the EUI64 if present to generate the devid and
241
242 /* tunable for FORMAT NVM command timeout in seconds, default is 600s */
243 int nvme_format_cmd_timeout = 600;
244
245 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
246 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
247 static int nvme_quiesce(dev_info_t *);
248 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *);
249 static int nvme_setup_interrupts(nvme_t *, int, int);
250 static void nvme_release_interrupts(nvme_t *);
251 static uint_t nvme_intr(caddr_t, caddr_t);
252
253 static void nvme_shutdown(nvme_t *, int, boolean_t);
254 static boolean_t nvme_reset(nvme_t *, boolean_t);
255 static int nvme_init(nvme_t *);
256 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
257 static void nvme_free_cmd(nvme_cmd_t *);
258 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
259 bd_xfer_t *);
260 static int nvme_admin_cmd(nvme_cmd_t *, int);
261 static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *);
262 static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *);
263 static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *);
264 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
265 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
266 static void nvme_wakeup_cmd(void *);
267 static void nvme_async_event_task(void *);
268
269 static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
270 static int nvme_check_vendor_cmd_status(nvme_cmd_t *);
271 static int nvme_check_integrity_cmd_status(nvme_cmd_t *);
272 static int nvme_check_specific_cmd_status(nvme_cmd_t *);
273 static int nvme_check_generic_cmd_status(nvme_cmd_t *);
274 static inline int nvme_check_cmd_status(nvme_cmd_t *);
275
276 static void nvme_abort_cmd(nvme_cmd_t *);
277 static void nvme_async_event(nvme_t *);
278 static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
279 boolean_t, uint8_t);
280 static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
281 static void *nvme_identify(nvme_t *, uint32_t);
282 static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
283 uint32_t *);
284 static boolean_t nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
285 void **, size_t *);
286 static boolean_t nvme_write_cache_set(nvme_t *, boolean_t);
287 static int nvme_set_nqueues(nvme_t *, uint16_t);
288
289 static void nvme_free_dma(nvme_dma_t *);
290 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *,
291 nvme_dma_t **);
292 static int nvme_zalloc_queue_dma(nvme_t *, uint32_t, uint16_t, uint_t,
293 nvme_dma_t **);
294 static void nvme_free_qpair(nvme_qpair_t *);
295 static int nvme_alloc_qpair(nvme_t *, uint32_t, nvme_qpair_t **, int);
296 static int nvme_create_io_qpair(nvme_t *, nvme_qpair_t *, uint16_t);
297
707 goto fail;
708 }
709
710 return (DDI_SUCCESS);
711
712 fail:
713 if (*dma) {
714 nvme_free_dma(*dma);
715 *dma = NULL;
716 }
717
718 return (DDI_FAILURE);
719 }
720
721 static void
722 nvme_free_qpair(nvme_qpair_t *qp)
723 {
724 int i;
725
726 mutex_destroy(&qp->nq_mutex);
727 sema_destroy(&qp->nq_sema);
728
729 if (qp->nq_sqdma != NULL)
730 nvme_free_dma(qp->nq_sqdma);
731 if (qp->nq_cqdma != NULL)
732 nvme_free_dma(qp->nq_cqdma);
733
734 if (qp->nq_active_cmds > 0)
735 for (i = 0; i != qp->nq_nentry; i++)
736 if (qp->nq_cmd[i] != NULL)
737 nvme_free_cmd(qp->nq_cmd[i]);
738
739 if (qp->nq_cmd != NULL)
740 kmem_free(qp->nq_cmd, sizeof (nvme_cmd_t *) * qp->nq_nentry);
741
742 kmem_free(qp, sizeof (nvme_qpair_t));
743 }
744
745 static int
746 nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp,
747 int idx)
748 {
749 nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP);
750
751 mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
752 DDI_INTR_PRI(nvme->n_intr_pri));
753 sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL);
754
755 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
756 DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
757 goto fail;
758
759 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_cqe_t),
760 DDI_DMA_READ, &qp->nq_cqdma) != DDI_SUCCESS)
761 goto fail;
762
763 qp->nq_sq = (nvme_sqe_t *)qp->nq_sqdma->nd_memp;
764 qp->nq_cq = (nvme_cqe_t *)qp->nq_cqdma->nd_memp;
765 qp->nq_nentry = nentry;
766
767 qp->nq_sqtdbl = NVME_REG_SQTDBL(nvme, idx);
768 qp->nq_cqhdbl = NVME_REG_CQHDBL(nvme, idx);
769
770 qp->nq_cmd = kmem_zalloc(sizeof (nvme_cmd_t *) * nentry, KM_SLEEP);
771 qp->nq_next_cmd = 0;
772
773 *nqp = qp;
800 }
801
802 static void
803 nvme_free_cmd(nvme_cmd_t *cmd)
804 {
805 if (cmd->nc_dma) {
806 if (cmd->nc_dma->nd_cached)
807 kmem_cache_free(cmd->nc_nvme->n_prp_cache,
808 cmd->nc_dma);
809 else
810 nvme_free_dma(cmd->nc_dma);
811 cmd->nc_dma = NULL;
812 }
813
814 cv_destroy(&cmd->nc_cv);
815 mutex_destroy(&cmd->nc_mutex);
816
817 kmem_cache_free(nvme_cmd_cache, cmd);
818 }
819
820 static void
821 nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
822 {
823 sema_p(&qp->nq_sema);
824 nvme_submit_cmd_common(qp, cmd);
825 }
826
827 static int
828 nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
829 {
830 if (sema_tryp(&qp->nq_sema) == 0)
831 return (EAGAIN);
832
833 nvme_submit_cmd_common(qp, cmd);
834 return (0);
835 }
836
837 static void
838 nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd)
839 {
840 nvme_reg_sqtdbl_t tail = { 0 };
841
842 mutex_enter(&qp->nq_mutex);
843 cmd->nc_completed = B_FALSE;
844
845 /*
846 * Try to insert the cmd into the active cmd array at the nq_next_cmd
847 * slot. If the slot is already occupied advance to the next slot and
848 * try again. This can happen for long running commands like async event
849 * requests.
850 */
851 while (qp->nq_cmd[qp->nq_next_cmd] != NULL)
852 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
853 qp->nq_cmd[qp->nq_next_cmd] = cmd;
854
855 qp->nq_active_cmds++;
856
857 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd;
858 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t));
859 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah,
860 sizeof (nvme_sqe_t) * qp->nq_sqtail,
861 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV);
862 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
863
864 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
865 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
866
867 mutex_exit(&qp->nq_mutex);
868 }
869
870 static nvme_cmd_t *
871 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
872 {
873 nvme_reg_cqhdbl_t head = { 0 };
874
875 nvme_cqe_t *cqe;
876 nvme_cmd_t *cmd;
877
878 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
879 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
880
881 mutex_enter(&qp->nq_mutex);
882 cqe = &qp->nq_cq[qp->nq_cqhead];
883
884 /* Check phase tag of CQE. Hardware inverts it for new entries. */
885 if (cqe->cqe_sf.sf_p == qp->nq_phase) {
886 mutex_exit(&qp->nq_mutex);
887 return (NULL);
893 cmd = qp->nq_cmd[cqe->cqe_cid];
894 qp->nq_cmd[cqe->cqe_cid] = NULL;
895 qp->nq_active_cmds--;
896
897 ASSERT(cmd != NULL);
898 ASSERT(cmd->nc_nvme == nvme);
899 ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
900 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
901 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
902
903 qp->nq_sqhead = cqe->cqe_sqhd;
904
905 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
906
907 /* Toggle phase on wrap-around. */
908 if (qp->nq_cqhead == 0)
909 qp->nq_phase = qp->nq_phase ? 0 : 1;
910
911 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
912 mutex_exit(&qp->nq_mutex);
913 sema_v(&qp->nq_sema);
914
915 return (cmd);
916 }
917
918 static int
919 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
920 {
921 nvme_cqe_t *cqe = &cmd->nc_cqe;
922
923 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
924 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
925 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
926 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
927 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
928
929 if (cmd->nc_xfer != NULL)
930 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
931
932 if (cmd->nc_nvme->n_strict_version) {
933 cmd->nc_nvme->n_dead = B_TRUE;
1362 if (cmd->nc_callback == nvme_abort_cmd_cb) {
1363 mutex_exit(&cmd->nc_mutex);
1364 nvme_abort_cmd_cb(cmd);
1365 return;
1366 }
1367
1368 cmd->nc_completed = B_TRUE;
1369 cv_signal(&cmd->nc_cv);
1370 mutex_exit(&cmd->nc_mutex);
1371 }
1372
1373 static void
1374 nvme_async_event_task(void *arg)
1375 {
1376 nvme_cmd_t *cmd = arg;
1377 nvme_t *nvme = cmd->nc_nvme;
1378 nvme_error_log_entry_t *error_log = NULL;
1379 nvme_health_log_t *health_log = NULL;
1380 size_t logsize = 0;
1381 nvme_async_event_t event;
1382
1383 /*
1384 * Check for errors associated with the async request itself. The only
1385 * command-specific error is "async event limit exceeded", which
1386 * indicates a programming error in the driver and causes a panic in
1387 * nvme_check_cmd_status().
1388 *
1389 * Other possible errors are various scenarios where the async request
1390 * was aborted, or internal errors in the device. Internal errors are
1391 * reported to FMA, the command aborts need no special handling here.
1392 */
1393 if (nvme_check_cmd_status(cmd)) {
1394 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
1395 "!async event request returned failure, sct = %x, "
1396 "sc = %x, dnr = %d, m = %d", cmd->nc_cqe.cqe_sf.sf_sct,
1397 cmd->nc_cqe.cqe_sf.sf_sc, cmd->nc_cqe.cqe_sf.sf_dnr,
1398 cmd->nc_cqe.cqe_sf.sf_m);
1399
1400 if (cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
1401 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INTERNAL_ERR) {
1402 cmd->nc_nvme->n_dead = B_TRUE;
1403 ddi_fm_service_impact(cmd->nc_nvme->n_dip,
1404 DDI_SERVICE_LOST);
1405 }
1406 nvme_free_cmd(cmd);
1407 return;
1408 }
1409
1410
1411 event.r = cmd->nc_cqe.cqe_dw0;
1412
1413 /* Clear CQE and re-submit the async request. */
1414 bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
1415 nvme_submit_admin_cmd(nvme->n_adminq, cmd);
1416
1417 switch (event.b.ae_type) {
1418 case NVME_ASYNC_TYPE_ERROR:
1419 if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
1420 (void) nvme_get_logpage(nvme, (void **)&error_log,
1421 &logsize, event.b.ae_logpage);
1422 } else {
1423 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
1424 "async event reply: %d", event.b.ae_logpage);
1425 atomic_inc_32(&nvme->n_wrong_logpage);
1426 }
1427
1428 switch (event.b.ae_info) {
1429 case NVME_ASYNC_ERROR_INV_SQ:
1430 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1431 "invalid submission queue");
1432 return;
1433
1434 case NVME_ASYNC_ERROR_INV_DBL:
1435 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1436 "invalid doorbell write value");
1508 break;
1509
1510 default:
1511 dev_err(nvme->n_dip, CE_WARN, "!unknown async event received, "
1512 "type = %x, info = %x, logpage = %x", event.b.ae_type,
1513 event.b.ae_info, event.b.ae_logpage);
1514 atomic_inc_32(&nvme->n_unknown_event);
1515 break;
1516 }
1517
1518 if (error_log)
1519 kmem_free(error_log, logsize);
1520
1521 if (health_log)
1522 kmem_free(health_log, logsize);
1523 }
1524
1525 static int
1526 nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
1527 {
1528 mutex_enter(&cmd->nc_mutex);
1529 nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd);
1530
1531 if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
1532 /*
1533 * The command timed out. An abort command was posted that
1534 * will take care of the cleanup.
1535 */
1536 return (DDI_FAILURE);
1537 }
1538 mutex_exit(&cmd->nc_mutex);
1539
1540 return (DDI_SUCCESS);
1541 }
1542
1543 static void
1544 nvme_async_event(nvme_t *nvme)
1545 {
1546 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1547
1548 cmd->nc_sqid = 0;
1549 cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
1550 cmd->nc_callback = nvme_async_event_task;
1551
1552 nvme_submit_admin_cmd(nvme->n_adminq, cmd);
1553 }
1554
1555 static int
1556 nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
1557 uint8_t pi, boolean_t pil, uint8_t ses)
1558 {
1559 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1560 nvme_format_nvm_t format_nvm = { 0 };
1561 int ret;
1562
1563 format_nvm.b.fm_lbaf = lbaf & 0xf;
1564 format_nvm.b.fm_ms = ms ? 1 : 0;
1565 format_nvm.b.fm_pi = pi & 0x7;
1566 format_nvm.b.fm_pil = pil ? 1 : 0;
1567 format_nvm.b.fm_ses = ses & 0x7;
1568
1569 cmd->nc_sqid = 0;
1570 cmd->nc_callback = nvme_wakeup_cmd;
1571 cmd->nc_sqe.sqe_nsid = nsid;
1572 cmd->nc_sqe.sqe_opc = NVME_OPC_NVM_FORMAT;
2349 */
2350 sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL);
2351
2352 /*
2353 * Setup initial interrupt for admin queue.
2354 */
2355 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1)
2356 != DDI_SUCCESS) &&
2357 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1)
2358 != DDI_SUCCESS) &&
2359 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1)
2360 != DDI_SUCCESS)) {
2361 dev_err(nvme->n_dip, CE_WARN,
2362 "!failed to setup initial interrupt");
2363 goto fail;
2364 }
2365
2366 /*
2367 * Post an asynchronous event command to catch errors.
2368 */
2369 nvme_async_event(nvme);
2370
2371 /*
2372 * Identify Controller
2373 */
2374 nvme->n_idctl = nvme_identify(nvme, 0);
2375 if (nvme->n_idctl == NULL) {
2376 dev_err(nvme->n_dip, CE_WARN,
2377 "!failed to identify controller");
2378 goto fail;
2379 }
2380
2381 /*
2382 * Get Vendor & Product ID
2383 */
2384 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model));
2385 model[sizeof (nvme->n_idctl->id_model)] = '\0';
2386 sata_split_model(model, &vendor, &product);
2387
2388 if (vendor == NULL)
2389 nvme->n_vendor = strdup("NVMe");
2574 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
2575 if (nvme_alloc_qpair(nvme, nvme->n_io_queue_len,
2576 &nvme->n_ioq[i], i) != DDI_SUCCESS) {
2577 dev_err(nvme->n_dip, CE_WARN,
2578 "!unable to allocate I/O qpair %d", i);
2579 goto fail;
2580 }
2581
2582 if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i)
2583 != DDI_SUCCESS) {
2584 dev_err(nvme->n_dip, CE_WARN,
2585 "!unable to create I/O qpair %d", i);
2586 goto fail;
2587 }
2588 }
2589
2590 /*
2591 * Post more asynchronous events commands to reduce event reporting
2592 * latency as suggested by the spec.
2593 */
2594 for (i = 1; i != nvme->n_async_event_limit; i++)
2595 nvme_async_event(nvme);
2596
2597 return (DDI_SUCCESS);
2598
2599 fail:
2600 (void) nvme_reset(nvme, B_FALSE);
2601 return (DDI_FAILURE);
2602 }
2603
2604 static uint_t
2605 nvme_intr(caddr_t arg1, caddr_t arg2)
2606 {
2607 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2608 nvme_t *nvme = (nvme_t *)arg1;
2609 int inum = (int)(uintptr_t)arg2;
2610 int ccnt = 0;
2611 int qnum;
2612 nvme_cmd_t *cmd;
2613
2614 if (inum >= nvme->n_intr_cnt)
2615 return (DDI_INTR_UNCLAIMED);
3239
3240 static int
3241 nvme_bd_mediainfo(void *arg, bd_media_t *media)
3242 {
3243 nvme_namespace_t *ns = arg;
3244
3245 media->m_nblks = ns->ns_block_count;
3246 media->m_blksize = ns->ns_block_size;
3247 media->m_readonly = B_FALSE;
3248 media->m_solidstate = B_TRUE;
3249
3250 media->m_pblksize = ns->ns_best_block_size;
3251
3252 return (0);
3253 }
3254
3255 static int
3256 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
3257 {
3258 nvme_t *nvme = ns->ns_nvme;
3259 nvme_cmd_t *cmd;
3260 nvme_qpair_t *ioq;
3261 boolean_t poll;
3262 int ret;
3263
3264 if (nvme->n_dead)
3265 return (EIO);
3266
3267 cmd = nvme_create_nvm_cmd(ns, opc, xfer);
3268 if (cmd == NULL)
3269 return (ENOMEM);
3270
3271 cmd->nc_sqid = (CPU->cpu_id % nvme->n_ioq_count) + 1;
3272 ASSERT(cmd->nc_sqid <= nvme->n_ioq_count);
3273 ioq = nvme->n_ioq[cmd->nc_sqid];
3274
3275 /*
3276 * Get the polling flag before submitting the command. The command may
3277 * complete immediately after it was submitted, which means we must
3278 * treat both cmd and xfer as if they have been freed already.
3279 */
3280 poll = (xfer->x_flags & BD_XFER_POLL) != 0;
3281
3282 ret = nvme_submit_io_cmd(ioq, cmd);
3283
3284 if (ret != 0)
3285 return (ret);
3286
3287 if (!poll)
3288 return (0);
3289
3290 do {
3291 cmd = nvme_retrieve_cmd(nvme, ioq);
3292 if (cmd != NULL)
3293 nvme_bd_xfer_done(cmd);
3294 else
3295 drv_usecwait(10);
3296 } while (ioq->nq_active_cmds != 0);
3297
3298 return (0);
3299 }
3300
3301 static int
3302 nvme_bd_read(void *arg, bd_xfer_t *xfer)
3303 {
3304 nvme_namespace_t *ns = arg;
3305
3306 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_READ));
3307 }
3308
3309 static int
3310 nvme_bd_write(void *arg, bd_xfer_t *xfer)
3311 {
3312 nvme_namespace_t *ns = arg;
3313
|