Print this page
don't block in nvme_bd_cmd
8629 nvme: rework command abortion
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/nvme/nvme.c
+++ new/usr/src/uts/common/io/nvme/nvme.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
14 14 * Copyright 2016 Tegile Systems, Inc. All rights reserved.
15 15 * Copyright (c) 2016 The MathWorks, Inc. All rights reserved.
16 16 * Copyright 2017 Joyent, Inc.
17 17 */
18 18
19 19 /*
20 20 * blkdev driver for NVMe compliant storage devices
21 21 *
22 22 * This driver was written to conform to version 1.2.1 of the NVMe
23 23 * specification. It may work with newer versions, but that is completely
24 24 * untested and disabled by default.
25 25 *
26 26 * The driver has only been tested on x86 systems and will not work on big-
27 27 * endian systems without changes to the code accessing registers and data
28 28 * structures used by the hardware.
29 29 *
30 30 *
31 31 * Interrupt Usage:
32 32 *
33 33 * The driver will use a single interrupt while configuring the device as the
34 34 * specification requires, but contrary to the specification it will try to use
35 35 * a single-message MSI(-X) or FIXED interrupt. Later in the attach process it
36 36 * will switch to multiple-message MSI(-X) if supported. The driver wants to
37 37 * have one interrupt vector per CPU, but it will work correctly if less are
38 38 * available. Interrupts can be shared by queues, the interrupt handler will
39 39 * iterate through the I/O queue array by steps of n_intr_cnt. Usually only
40 40 * the admin queue will share an interrupt with one I/O queue. The interrupt
41 41 * handler will retrieve completed commands from all queues sharing an interrupt
42 42 * vector and will post them to a taskq for completion processing.
43 43 *
44 44 *
45 45 * Command Processing:
46 46 *
47 47 * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
48 48 * to 65536 I/O commands. The driver will configure one I/O queue pair per
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
49 49 * available interrupt vector, with the queue length usually much smaller than
50 50 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer
51 51 * interrupt vectors will be used.
52 52 *
53 53 * Additionally the hardware provides a single special admin queue pair that can
54 54 * hold up to 4096 admin commands.
55 55 *
56 56 * From the hardware perspective both queues of a queue pair are independent,
57 57 * but they share some driver state: the command array (holding pointers to
58 58 * commands currently being processed by the hardware) and the active command
59 - * counter. Access to the submission side of a queue pair and the shared state
60 - * is protected by nq_mutex. The completion side of a queue pair does not need
61 - * that protection apart from its access to the shared state; it is called only
62 - * in the interrupt handler which does not run concurrently for the same
63 - * interrupt vector.
59 + * counter. Access to a queue pair and the shared state is protected by
60 + * nq_mutex.
64 61 *
65 62 * When a command is submitted to a queue pair the active command counter is
66 63 * incremented and a pointer to the command is stored in the command array. The
67 64 * array index is used as command identifier (CID) in the submission queue
68 65 * entry. Some commands may take a very long time to complete, and if the queue
69 66 * wraps around in that time a submission may find the next array slot to still
70 67 * be used by a long-running command. In this case the array is sequentially
71 68 * searched for the next free slot. The length of the command array is the same
72 69 * as the configured queue length. Queue overrun is prevented by the semaphore,
73 70 * so a command submission may block if the queue is full.
74 71 *
75 72 *
76 73 * Polled I/O Support:
77 74 *
78 75 * For kernel core dump support the driver can do polled I/O. As interrupts are
79 76 * turned off while dumping the driver will just submit a command in the regular
80 77 * way, and then repeatedly attempt a command retrieval until it gets the
81 78 * command back.
82 79 *
83 80 *
84 81 * Namespace Support:
85 82 *
86 83 * NVMe devices can have multiple namespaces, each being a independent data
87 84 * store. The driver supports multiple namespaces and creates a blkdev interface
88 85 * for each namespace found. Namespaces can have various attributes to support
89 86 * thin provisioning and protection information. This driver does not support
90 87 * any of this and ignores namespaces that have these attributes.
91 88 *
92 89 * As of NVMe 1.1 namespaces can have an 64bit Extended Unique Identifier
93 90 * (EUI64). This driver uses the EUI64 if present to generate the devid and
94 91 * passes it to blkdev to use it in the device node names. As this is currently
95 92 * untested namespaces with EUI64 are ignored by default.
96 93 *
97 94 * We currently support only (2 << NVME_MINOR_INST_SHIFT) - 2 namespaces in a
98 95 * single controller. This is an artificial limit imposed by the driver to be
99 96 * able to address a reasonable number of controllers and namespaces using a
100 97 * 32bit minor node number.
101 98 *
102 99 *
103 100 * Minor nodes:
104 101 *
105 102 * For each NVMe device the driver exposes one minor node for the controller and
106 103 * one minor node for each namespace. The only operations supported by those
107 104 * minor nodes are open(9E), close(9E), and ioctl(9E). This serves as the
108 105 * interface for the nvmeadm(1M) utility.
109 106 *
110 107 *
111 108 * Blkdev Interface:
112 109 *
113 110 * This driver uses blkdev to do all the heavy lifting involved with presenting
114 111 * a disk device to the system. As a result, the processing of I/O requests is
115 112 * relatively simple as blkdev takes care of partitioning, boundary checks, DMA
116 113 * setup, and splitting of transfers into manageable chunks.
117 114 *
118 115 * I/O requests coming in from blkdev are turned into NVM commands and posted to
119 116 * an I/O queue. The queue is selected by taking the CPU id modulo the number of
120 117 * queues. There is currently no timeout handling of I/O commands.
121 118 *
122 119 * Blkdev also supports querying device/media information and generating a
123 120 * devid. The driver reports the best block size as determined by the namespace
124 121 * format back to blkdev as physical block size to support partition and block
125 122 * alignment. The devid is either based on the namespace EUI64, if present, or
126 123 * composed using the device vendor ID, model number, serial number, and the
127 124 * namespace ID.
128 125 *
129 126 *
130 127 * Error Handling:
131 128 *
132 129 * Error handling is currently limited to detecting fatal hardware errors,
133 130 * either by asynchronous events, or synchronously through command status or
134 131 * admin command timeouts. In case of severe errors the device is fenced off,
135 132 * all further requests will return EIO. FMA is then called to fault the device.
136 133 *
137 134 * The hardware has a limit for outstanding asynchronous event requests. Before
138 135 * this limit is known the driver assumes it is at least 1 and posts a single
139 136 * asynchronous request. Later when the limit is known more asynchronous event
140 137 * requests are posted to allow quicker reception of error information. When an
141 138 * asynchronous event is posted by the hardware the driver will parse the error
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
142 139 * status fields and log information or fault the device, depending on the
143 140 * severity of the asynchronous event. The asynchronous event request is then
144 141 * reused and posted to the admin queue again.
145 142 *
146 143 * On command completion the command status is checked for errors. In case of
147 144 * errors indicating a driver bug the driver panics. Almost all other error
148 145 * status values just cause EIO to be returned.
149 146 *
150 147 * Command timeouts are currently detected for all admin commands except
151 148 * asynchronous event requests. If a command times out and the hardware appears
152 - * to be healthy the driver attempts to abort the command. If this fails the
149 + * to be healthy the driver attempts to abort the command. The original command
150 + * timeout is also applied to the abort command. If the abort times out too the
153 151 * driver assumes the device to be dead, fences it off, and calls FMA to retire
154 - * it. In general admin commands are issued at attach time only. No timeout
155 - * handling of normal I/O commands is presently done.
152 + * it. In all other cases the aborted command should return immediately with a
153 + * status indicating it was aborted, and the driver will wait indefinitely for
154 + * that to happen. No timeout handling of normal I/O commands is presently done.
156 155 *
157 - * In some cases it may be possible that the ABORT command times out, too. In
158 - * that case the device is also declared dead and fenced off.
156 + * Any command that times out due to the controller dropping dead will be put on
157 + * nvme_lost_cmds list if it references DMA memory. This will prevent the DMA
158 + * memory being reused by the system and later be written to by a "dead" NVMe
159 + * controller.
159 160 *
160 161 *
162 + * Locking:
163 + *
164 + * Each queue pair has its own nq_mutex, which must be held when accessing the
165 + * associated queue registers or the shared state of the queue pair. Callers of
166 + * nvme_unqueue_cmd() must make sure that nq_mutex is held, while
167 + * nvme_submit_{admin,io}_cmd() and nvme_retrieve_cmd() take care of this
168 + * themselves.
169 + *
170 + * Each command also has its own nc_mutex, which is associated with the
171 + * condition variable nc_cv. It is only used on admin commands which are run
172 + * synchronously. In that case it must be held across calls to
173 + * nvme_submit_{admin,io}_cmd() and nvme_wait_cmd(), which is taken care of by
174 + * nvme_admin_cmd(). It must also be held whenever the completion state of the
175 + * command is changed or while a admin command timeout is handled.
176 + *
177 + * If both nc_mutex and nq_mutex must be held, nc_mutex must be acquired first.
178 + * More than one nc_mutex may only be held when aborting commands. In this case,
179 + * the nc_mutex of the command to be aborted must be held across the call to
180 + * nvme_abort_cmd() to prevent the command from completing while the abort is in
181 + * progress.
182 + *
183 + * Each minor node has its own nm_mutex, which protects the open count nm_ocnt
184 + * and exclusive-open flag nm_oexcl.
185 + *
186 + *
161 187 * Quiesce / Fast Reboot:
162 188 *
163 189 * The driver currently does not support fast reboot. A quiesce(9E) entry point
164 190 * is still provided which is used to send a shutdown notification to the
165 191 * device.
166 192 *
167 193 *
168 194 * Driver Configuration:
169 195 *
170 196 * The following driver properties can be changed to control some aspects of the
171 197 * drivers operation:
172 198 * - strict-version: can be set to 0 to allow devices conforming to newer
173 199 * versions or namespaces with EUI64 to be used
174 200 * - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor
175 201 * specific command status as a fatal error leading device faulting
176 202 * - admin-queue-len: the maximum length of the admin queue (16-4096)
177 203 * - io-queue-len: the maximum length of the I/O queues (16-65536)
178 204 * - async-event-limit: the maximum number of asynchronous event requests to be
179 205 * posted by the driver
180 206 * - volatile-write-cache-enable: can be set to 0 to disable the volatile write
181 207 * cache
182 208 * - min-phys-block-size: the minimum physical block size to report to blkdev,
183 209 * which is among other things the basis for ZFS vdev ashift
184 210 *
185 211 *
186 212 * TODO:
187 213 * - figure out sane default for I/O queue depth reported to blkdev
188 214 * - FMA handling of media errors
189 215 * - support for devices supporting very large I/O requests using chained PRPs
190 216 * - support for configuring hardware parameters like interrupt coalescing
191 217 * - support for media formatting and hard partitioning into namespaces
192 218 * - support for big-endian systems
193 219 * - support for fast reboot
194 220 * - support for firmware updates
195 221 * - support for NVMe Subsystem Reset (1.1)
196 222 * - support for Scatter/Gather lists (1.1)
197 223 * - support for Reservations (1.1)
198 224 * - support for power management
199 225 */
200 226
201 227 #include <sys/byteorder.h>
202 228 #ifdef _BIG_ENDIAN
203 229 #error nvme driver needs porting for big-endian platforms
204 230 #endif
205 231
206 232 #include <sys/modctl.h>
207 233 #include <sys/conf.h>
208 234 #include <sys/devops.h>
209 235 #include <sys/ddi.h>
210 236 #include <sys/sunddi.h>
211 237 #include <sys/sunndi.h>
212 238 #include <sys/bitmap.h>
213 239 #include <sys/sysmacros.h>
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
214 240 #include <sys/param.h>
215 241 #include <sys/varargs.h>
216 242 #include <sys/cpuvar.h>
217 243 #include <sys/disp.h>
218 244 #include <sys/blkdev.h>
219 245 #include <sys/atomic.h>
220 246 #include <sys/archsystm.h>
221 247 #include <sys/sata/sata_hba.h>
222 248 #include <sys/stat.h>
223 249 #include <sys/policy.h>
250 +#include <sys/list.h>
224 251
225 252 #include <sys/nvme.h>
226 253
227 254 #ifdef __x86
228 255 #include <sys/x86_archext.h>
229 256 #endif
230 257
231 258 #include "nvme_reg.h"
232 259 #include "nvme_var.h"
233 260
234 261
235 262 /* NVMe spec version supported */
236 263 static const int nvme_version_major = 1;
237 264 static const int nvme_version_minor = 2;
238 265
239 266 /* tunable for admin command timeout in seconds, default is 1s */
240 267 int nvme_admin_cmd_timeout = 1;
241 268
242 269 /* tunable for FORMAT NVM command timeout in seconds, default is 600s */
243 270 int nvme_format_cmd_timeout = 600;
244 271
245 272 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
246 273 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
247 274 static int nvme_quiesce(dev_info_t *);
248 275 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *);
249 276 static int nvme_setup_interrupts(nvme_t *, int, int);
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
250 277 static void nvme_release_interrupts(nvme_t *);
251 278 static uint_t nvme_intr(caddr_t, caddr_t);
252 279
253 280 static void nvme_shutdown(nvme_t *, int, boolean_t);
254 281 static boolean_t nvme_reset(nvme_t *, boolean_t);
255 282 static int nvme_init(nvme_t *);
256 283 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
257 284 static void nvme_free_cmd(nvme_cmd_t *);
258 285 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
259 286 bd_xfer_t *);
260 -static int nvme_admin_cmd(nvme_cmd_t *, int);
287 +static void nvme_admin_cmd(nvme_cmd_t *, int);
261 288 static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *);
262 289 static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *);
263 290 static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *);
291 +static nvme_cmd_t *nvme_unqueue_cmd(nvme_t *, nvme_qpair_t *, int);
264 292 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
265 -static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
293 +static void nvme_wait_cmd(nvme_cmd_t *, uint_t);
266 294 static void nvme_wakeup_cmd(void *);
267 295 static void nvme_async_event_task(void *);
268 296
269 297 static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
270 298 static int nvme_check_vendor_cmd_status(nvme_cmd_t *);
271 299 static int nvme_check_integrity_cmd_status(nvme_cmd_t *);
272 300 static int nvme_check_specific_cmd_status(nvme_cmd_t *);
273 301 static int nvme_check_generic_cmd_status(nvme_cmd_t *);
274 302 static inline int nvme_check_cmd_status(nvme_cmd_t *);
275 303
276 -static void nvme_abort_cmd(nvme_cmd_t *);
304 +static int nvme_abort_cmd(nvme_cmd_t *, uint_t);
277 305 static void nvme_async_event(nvme_t *);
278 306 static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
279 307 boolean_t, uint8_t);
280 308 static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
281 -static void *nvme_identify(nvme_t *, uint32_t);
282 -static boolean_t nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
309 +static int nvme_identify(nvme_t *, uint32_t, void **);
310 +static int nvme_set_features(nvme_t *, uint32_t, uint8_t, uint32_t,
283 311 uint32_t *);
284 -static boolean_t nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
312 +static int nvme_get_features(nvme_t *, uint32_t, uint8_t, uint32_t *,
285 313 void **, size_t *);
286 -static boolean_t nvme_write_cache_set(nvme_t *, boolean_t);
287 -static int nvme_set_nqueues(nvme_t *, uint16_t);
314 +static int nvme_write_cache_set(nvme_t *, boolean_t);
315 +static int nvme_set_nqueues(nvme_t *, uint16_t *);
288 316
289 317 static void nvme_free_dma(nvme_dma_t *);
290 318 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *,
291 319 nvme_dma_t **);
292 320 static int nvme_zalloc_queue_dma(nvme_t *, uint32_t, uint16_t, uint_t,
293 321 nvme_dma_t **);
294 322 static void nvme_free_qpair(nvme_qpair_t *);
295 323 static int nvme_alloc_qpair(nvme_t *, uint32_t, nvme_qpair_t **, int);
296 324 static int nvme_create_io_qpair(nvme_t *, nvme_qpair_t *, uint16_t);
297 325
298 326 static inline void nvme_put64(nvme_t *, uintptr_t, uint64_t);
299 327 static inline void nvme_put32(nvme_t *, uintptr_t, uint32_t);
300 328 static inline uint64_t nvme_get64(nvme_t *, uintptr_t);
301 329 static inline uint32_t nvme_get32(nvme_t *, uintptr_t);
302 330
303 331 static boolean_t nvme_check_regs_hdl(nvme_t *);
304 332 static boolean_t nvme_check_dma_hdl(nvme_dma_t *);
305 333
306 334 static int nvme_fill_prp(nvme_cmd_t *, bd_xfer_t *);
307 335
308 336 static void nvme_bd_xfer_done(void *);
309 337 static void nvme_bd_driveinfo(void *, bd_drive_t *);
310 338 static int nvme_bd_mediainfo(void *, bd_media_t *);
311 339 static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t);
312 340 static int nvme_bd_read(void *, bd_xfer_t *);
313 341 static int nvme_bd_write(void *, bd_xfer_t *);
314 342 static int nvme_bd_sync(void *, bd_xfer_t *);
315 343 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
316 344
317 345 static int nvme_prp_dma_constructor(void *, void *, int);
318 346 static void nvme_prp_dma_destructor(void *, void *);
319 347
320 348 static void nvme_prepare_devid(nvme_t *, uint32_t);
321 349
322 350 static int nvme_open(dev_t *, int, int, cred_t *);
323 351 static int nvme_close(dev_t, int, int, cred_t *);
324 352 static int nvme_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
325 353
326 354 #define NVME_MINOR_INST_SHIFT 9
327 355 #define NVME_MINOR(inst, nsid) (((inst) << NVME_MINOR_INST_SHIFT) | (nsid))
328 356 #define NVME_MINOR_INST(minor) ((minor) >> NVME_MINOR_INST_SHIFT)
329 357 #define NVME_MINOR_NSID(minor) ((minor) & ((1 << NVME_MINOR_INST_SHIFT) - 1))
330 358 #define NVME_MINOR_MAX (NVME_MINOR(1, 0) - 2)
331 359
332 360 static void *nvme_state;
333 361 static kmem_cache_t *nvme_cmd_cache;
334 362
335 363 /*
336 364 * DMA attributes for queue DMA memory
337 365 *
338 366 * Queue DMA memory must be page aligned. The maximum length of a queue is
339 367 * 65536 entries, and an entry can be 64 bytes long.
340 368 */
341 369 static ddi_dma_attr_t nvme_queue_dma_attr = {
342 370 .dma_attr_version = DMA_ATTR_V0,
343 371 .dma_attr_addr_lo = 0,
344 372 .dma_attr_addr_hi = 0xffffffffffffffffULL,
345 373 .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1,
346 374 .dma_attr_align = 0x1000,
347 375 .dma_attr_burstsizes = 0x7ff,
348 376 .dma_attr_minxfer = 0x1000,
349 377 .dma_attr_maxxfer = (UINT16_MAX + 1) * sizeof (nvme_sqe_t),
350 378 .dma_attr_seg = 0xffffffffffffffffULL,
351 379 .dma_attr_sgllen = 1,
352 380 .dma_attr_granular = 1,
353 381 .dma_attr_flags = 0,
354 382 };
355 383
356 384 /*
357 385 * DMA attributes for transfers using Physical Region Page (PRP) entries
358 386 *
359 387 * A PRP entry describes one page of DMA memory using the page size specified
360 388 * in the controller configuration's memory page size register (CC.MPS). It uses
361 389 * a 64bit base address aligned to this page size. There is no limitation on
362 390 * chaining PRPs together for arbitrarily large DMA transfers.
363 391 */
364 392 static ddi_dma_attr_t nvme_prp_dma_attr = {
365 393 .dma_attr_version = DMA_ATTR_V0,
366 394 .dma_attr_addr_lo = 0,
367 395 .dma_attr_addr_hi = 0xffffffffffffffffULL,
368 396 .dma_attr_count_max = 0xfff,
369 397 .dma_attr_align = 0x1000,
370 398 .dma_attr_burstsizes = 0x7ff,
371 399 .dma_attr_minxfer = 0x1000,
372 400 .dma_attr_maxxfer = 0x1000,
373 401 .dma_attr_seg = 0xfff,
374 402 .dma_attr_sgllen = -1,
375 403 .dma_attr_granular = 1,
376 404 .dma_attr_flags = 0,
377 405 };
378 406
379 407 /*
380 408 * DMA attributes for transfers using scatter/gather lists
381 409 *
382 410 * A SGL entry describes a chunk of DMA memory using a 64bit base address and a
383 411 * 32bit length field. SGL Segment and SGL Last Segment entries require the
384 412 * length to be a multiple of 16 bytes.
385 413 */
386 414 static ddi_dma_attr_t nvme_sgl_dma_attr = {
387 415 .dma_attr_version = DMA_ATTR_V0,
388 416 .dma_attr_addr_lo = 0,
389 417 .dma_attr_addr_hi = 0xffffffffffffffffULL,
390 418 .dma_attr_count_max = 0xffffffffUL,
391 419 .dma_attr_align = 1,
392 420 .dma_attr_burstsizes = 0x7ff,
393 421 .dma_attr_minxfer = 0x10,
394 422 .dma_attr_maxxfer = 0xfffffffffULL,
395 423 .dma_attr_seg = 0xffffffffffffffffULL,
396 424 .dma_attr_sgllen = -1,
397 425 .dma_attr_granular = 0x10,
398 426 .dma_attr_flags = 0
399 427 };
400 428
401 429 static ddi_device_acc_attr_t nvme_reg_acc_attr = {
402 430 .devacc_attr_version = DDI_DEVICE_ATTR_V0,
403 431 .devacc_attr_endian_flags = DDI_STRUCTURE_LE_ACC,
404 432 .devacc_attr_dataorder = DDI_STRICTORDER_ACC
405 433 };
406 434
407 435 static struct cb_ops nvme_cb_ops = {
408 436 .cb_open = nvme_open,
409 437 .cb_close = nvme_close,
410 438 .cb_strategy = nodev,
411 439 .cb_print = nodev,
412 440 .cb_dump = nodev,
413 441 .cb_read = nodev,
414 442 .cb_write = nodev,
415 443 .cb_ioctl = nvme_ioctl,
416 444 .cb_devmap = nodev,
417 445 .cb_mmap = nodev,
418 446 .cb_segmap = nodev,
419 447 .cb_chpoll = nochpoll,
420 448 .cb_prop_op = ddi_prop_op,
421 449 .cb_str = 0,
422 450 .cb_flag = D_NEW | D_MP,
423 451 .cb_rev = CB_REV,
424 452 .cb_aread = nodev,
425 453 .cb_awrite = nodev
426 454 };
427 455
428 456 static struct dev_ops nvme_dev_ops = {
429 457 .devo_rev = DEVO_REV,
430 458 .devo_refcnt = 0,
431 459 .devo_getinfo = ddi_no_info,
432 460 .devo_identify = nulldev,
433 461 .devo_probe = nulldev,
434 462 .devo_attach = nvme_attach,
435 463 .devo_detach = nvme_detach,
436 464 .devo_reset = nodev,
437 465 .devo_cb_ops = &nvme_cb_ops,
438 466 .devo_bus_ops = NULL,
439 467 .devo_power = NULL,
440 468 .devo_quiesce = nvme_quiesce,
441 469 };
442 470
443 471 static struct modldrv nvme_modldrv = {
444 472 .drv_modops = &mod_driverops,
445 473 .drv_linkinfo = "NVMe v1.1b",
446 474 .drv_dev_ops = &nvme_dev_ops
447 475 };
448 476
449 477 static struct modlinkage nvme_modlinkage = {
450 478 .ml_rev = MODREV_1,
451 479 .ml_linkage = { &nvme_modldrv, NULL }
452 480 };
453 481
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
454 482 static bd_ops_t nvme_bd_ops = {
455 483 .o_version = BD_OPS_VERSION_0,
456 484 .o_drive_info = nvme_bd_driveinfo,
457 485 .o_media_info = nvme_bd_mediainfo,
458 486 .o_devid_init = nvme_bd_devid,
459 487 .o_sync_cache = nvme_bd_sync,
460 488 .o_read = nvme_bd_read,
461 489 .o_write = nvme_bd_write,
462 490 };
463 491
492 +/*
493 + * This list will hold commands that have timed out and couldn't be aborted.
494 + * As we don't know what the hardware may still do with the DMA memory we can't
495 + * free them, so we'll keep them forever on this list where we can easily look
496 + * at them with mdb.
497 + */
498 +static struct list nvme_lost_cmds;
499 +static kmutex_t nvme_lc_mutex;
500 +
464 501 int
465 502 _init(void)
466 503 {
467 504 int error;
468 505
469 506 error = ddi_soft_state_init(&nvme_state, sizeof (nvme_t), 1);
470 507 if (error != DDI_SUCCESS)
471 508 return (error);
472 509
473 510 nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache",
474 511 sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
475 512
513 + mutex_init(&nvme_lc_mutex, NULL, MUTEX_DRIVER, NULL);
514 + list_create(&nvme_lost_cmds, sizeof (nvme_cmd_t),
515 + offsetof(nvme_cmd_t, nc_list));
516 +
476 517 bd_mod_init(&nvme_dev_ops);
477 518
478 519 error = mod_install(&nvme_modlinkage);
479 520 if (error != DDI_SUCCESS) {
480 521 ddi_soft_state_fini(&nvme_state);
522 + mutex_destroy(&nvme_lc_mutex);
523 + list_destroy(&nvme_lost_cmds);
481 524 bd_mod_fini(&nvme_dev_ops);
482 525 }
483 526
484 527 return (error);
485 528 }
486 529
487 530 int
488 531 _fini(void)
489 532 {
490 533 int error;
491 534
535 + if (!list_is_empty(&nvme_lost_cmds))
536 + return (DDI_FAILURE);
537 +
492 538 error = mod_remove(&nvme_modlinkage);
493 539 if (error == DDI_SUCCESS) {
494 540 ddi_soft_state_fini(&nvme_state);
495 541 kmem_cache_destroy(nvme_cmd_cache);
542 + mutex_destroy(&nvme_lc_mutex);
543 + list_destroy(&nvme_lost_cmds);
496 544 bd_mod_fini(&nvme_dev_ops);
497 545 }
498 546
499 547 return (error);
500 548 }
501 549
502 550 int
503 551 _info(struct modinfo *modinfop)
504 552 {
505 553 return (mod_info(&nvme_modlinkage, modinfop));
506 554 }
507 555
508 556 static inline void
509 557 nvme_put64(nvme_t *nvme, uintptr_t reg, uint64_t val)
510 558 {
511 559 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0);
512 560
513 561 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
514 562 ddi_put64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg), val);
515 563 }
516 564
517 565 static inline void
518 566 nvme_put32(nvme_t *nvme, uintptr_t reg, uint32_t val)
519 567 {
520 568 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0);
521 569
522 570 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
523 571 ddi_put32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg), val);
524 572 }
525 573
526 574 static inline uint64_t
527 575 nvme_get64(nvme_t *nvme, uintptr_t reg)
528 576 {
529 577 uint64_t val;
530 578
531 579 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0);
532 580
533 581 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
534 582 val = ddi_get64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg));
535 583
536 584 return (val);
537 585 }
538 586
539 587 static inline uint32_t
540 588 nvme_get32(nvme_t *nvme, uintptr_t reg)
541 589 {
542 590 uint32_t val;
543 591
544 592 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0);
545 593
546 594 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
547 595 val = ddi_get32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg));
548 596
549 597 return (val);
550 598 }
551 599
552 600 static boolean_t
553 601 nvme_check_regs_hdl(nvme_t *nvme)
554 602 {
555 603 ddi_fm_error_t error;
556 604
557 605 ddi_fm_acc_err_get(nvme->n_regh, &error, DDI_FME_VERSION);
558 606
559 607 if (error.fme_status != DDI_FM_OK)
560 608 return (B_TRUE);
561 609
562 610 return (B_FALSE);
563 611 }
564 612
565 613 static boolean_t
566 614 nvme_check_dma_hdl(nvme_dma_t *dma)
567 615 {
568 616 ddi_fm_error_t error;
569 617
570 618 if (dma == NULL)
571 619 return (B_FALSE);
572 620
573 621 ddi_fm_dma_err_get(dma->nd_dmah, &error, DDI_FME_VERSION);
574 622
575 623 if (error.fme_status != DDI_FM_OK)
576 624 return (B_TRUE);
577 625
578 626 return (B_FALSE);
579 627 }
580 628
581 629 static void
582 630 nvme_free_dma_common(nvme_dma_t *dma)
583 631 {
584 632 if (dma->nd_dmah != NULL)
585 633 (void) ddi_dma_unbind_handle(dma->nd_dmah);
586 634 if (dma->nd_acch != NULL)
587 635 ddi_dma_mem_free(&dma->nd_acch);
588 636 if (dma->nd_dmah != NULL)
589 637 ddi_dma_free_handle(&dma->nd_dmah);
590 638 }
591 639
592 640 static void
593 641 nvme_free_dma(nvme_dma_t *dma)
594 642 {
595 643 nvme_free_dma_common(dma);
596 644 kmem_free(dma, sizeof (*dma));
597 645 }
598 646
599 647 /* ARGSUSED */
600 648 static void
601 649 nvme_prp_dma_destructor(void *buf, void *private)
602 650 {
603 651 nvme_dma_t *dma = (nvme_dma_t *)buf;
604 652
605 653 nvme_free_dma_common(dma);
606 654 }
607 655
608 656 static int
609 657 nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma,
610 658 size_t len, uint_t flags, ddi_dma_attr_t *dma_attr)
611 659 {
612 660 if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL,
613 661 &dma->nd_dmah) != DDI_SUCCESS) {
614 662 /*
615 663 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
616 664 * the only other possible error is DDI_DMA_BADATTR which
617 665 * indicates a driver bug which should cause a panic.
618 666 */
619 667 dev_err(nvme->n_dip, CE_PANIC,
620 668 "!failed to get DMA handle, check DMA attributes");
621 669 return (DDI_FAILURE);
622 670 }
623 671
624 672 /*
625 673 * ddi_dma_mem_alloc() can only fail when DDI_DMA_NOSLEEP is specified
626 674 * or the flags are conflicting, which isn't the case here.
627 675 */
628 676 (void) ddi_dma_mem_alloc(dma->nd_dmah, len, &nvme->n_reg_acc_attr,
629 677 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &dma->nd_memp,
630 678 &dma->nd_len, &dma->nd_acch);
631 679
632 680 if (ddi_dma_addr_bind_handle(dma->nd_dmah, NULL, dma->nd_memp,
633 681 dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
634 682 &dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) {
635 683 dev_err(nvme->n_dip, CE_WARN,
636 684 "!failed to bind DMA memory");
637 685 atomic_inc_32(&nvme->n_dma_bind_err);
638 686 nvme_free_dma_common(dma);
639 687 return (DDI_FAILURE);
640 688 }
641 689
642 690 return (DDI_SUCCESS);
643 691 }
644 692
645 693 static int
646 694 nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
647 695 ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
648 696 {
649 697 nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
650 698
651 699 if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) !=
652 700 DDI_SUCCESS) {
653 701 *ret = NULL;
654 702 kmem_free(dma, sizeof (nvme_dma_t));
655 703 return (DDI_FAILURE);
656 704 }
657 705
658 706 bzero(dma->nd_memp, dma->nd_len);
659 707
660 708 *ret = dma;
661 709 return (DDI_SUCCESS);
662 710 }
663 711
664 712 /* ARGSUSED */
665 713 static int
666 714 nvme_prp_dma_constructor(void *buf, void *private, int flags)
667 715 {
668 716 nvme_dma_t *dma = (nvme_dma_t *)buf;
669 717 nvme_t *nvme = (nvme_t *)private;
670 718
671 719 dma->nd_dmah = NULL;
672 720 dma->nd_acch = NULL;
673 721
674 722 if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize,
675 723 DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) {
676 724 return (-1);
677 725 }
678 726
679 727 ASSERT(dma->nd_ncookie == 1);
680 728
681 729 dma->nd_cached = B_TRUE;
682 730
683 731 return (0);
684 732 }
685 733
686 734 static int
687 735 nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len,
688 736 uint_t flags, nvme_dma_t **dma)
689 737 {
690 738 uint32_t len = nentry * qe_len;
691 739 ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr;
692 740
693 741 len = roundup(len, nvme->n_pagesize);
694 742
695 743 q_dma_attr.dma_attr_minxfer = len;
696 744
697 745 if (nvme_zalloc_dma(nvme, len, flags, &q_dma_attr, dma)
698 746 != DDI_SUCCESS) {
699 747 dev_err(nvme->n_dip, CE_WARN,
700 748 "!failed to get DMA memory for queue");
701 749 goto fail;
702 750 }
703 751
704 752 if ((*dma)->nd_ncookie != 1) {
705 753 dev_err(nvme->n_dip, CE_WARN,
706 754 "!got too many cookies for queue DMA");
707 755 goto fail;
708 756 }
709 757
710 758 return (DDI_SUCCESS);
711 759
712 760 fail:
713 761 if (*dma) {
714 762 nvme_free_dma(*dma);
715 763 *dma = NULL;
716 764 }
717 765
718 766 return (DDI_FAILURE);
719 767 }
720 768
721 769 static void
722 770 nvme_free_qpair(nvme_qpair_t *qp)
723 771 {
724 772 int i;
725 773
726 774 mutex_destroy(&qp->nq_mutex);
727 775 sema_destroy(&qp->nq_sema);
728 776
729 777 if (qp->nq_sqdma != NULL)
730 778 nvme_free_dma(qp->nq_sqdma);
731 779 if (qp->nq_cqdma != NULL)
732 780 nvme_free_dma(qp->nq_cqdma);
733 781
734 782 if (qp->nq_active_cmds > 0)
735 783 for (i = 0; i != qp->nq_nentry; i++)
736 784 if (qp->nq_cmd[i] != NULL)
737 785 nvme_free_cmd(qp->nq_cmd[i]);
738 786
739 787 if (qp->nq_cmd != NULL)
740 788 kmem_free(qp->nq_cmd, sizeof (nvme_cmd_t *) * qp->nq_nentry);
741 789
742 790 kmem_free(qp, sizeof (nvme_qpair_t));
743 791 }
744 792
745 793 static int
746 794 nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp,
747 795 int idx)
748 796 {
749 797 nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP);
750 798
751 799 mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
752 800 DDI_INTR_PRI(nvme->n_intr_pri));
753 801 sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL);
754 802
755 803 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
756 804 DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
757 805 goto fail;
758 806
759 807 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_cqe_t),
760 808 DDI_DMA_READ, &qp->nq_cqdma) != DDI_SUCCESS)
761 809 goto fail;
762 810
763 811 qp->nq_sq = (nvme_sqe_t *)qp->nq_sqdma->nd_memp;
764 812 qp->nq_cq = (nvme_cqe_t *)qp->nq_cqdma->nd_memp;
765 813 qp->nq_nentry = nentry;
766 814
767 815 qp->nq_sqtdbl = NVME_REG_SQTDBL(nvme, idx);
768 816 qp->nq_cqhdbl = NVME_REG_CQHDBL(nvme, idx);
769 817
770 818 qp->nq_cmd = kmem_zalloc(sizeof (nvme_cmd_t *) * nentry, KM_SLEEP);
771 819 qp->nq_next_cmd = 0;
772 820
773 821 *nqp = qp;
774 822 return (DDI_SUCCESS);
775 823
776 824 fail:
777 825 nvme_free_qpair(qp);
778 826 *nqp = NULL;
779 827
780 828 return (DDI_FAILURE);
781 829 }
782 830
783 831 static nvme_cmd_t *
784 832 nvme_alloc_cmd(nvme_t *nvme, int kmflag)
785 833 {
786 834 nvme_cmd_t *cmd = kmem_cache_alloc(nvme_cmd_cache, kmflag);
787 835
788 836 if (cmd == NULL)
789 837 return (cmd);
790 838
791 839 bzero(cmd, sizeof (nvme_cmd_t));
792 840
793 841 cmd->nc_nvme = nvme;
794 842
↓ open down ↓ |
289 lines elided |
↑ open up ↑ |
795 843 mutex_init(&cmd->nc_mutex, NULL, MUTEX_DRIVER,
796 844 DDI_INTR_PRI(nvme->n_intr_pri));
797 845 cv_init(&cmd->nc_cv, NULL, CV_DRIVER, NULL);
798 846
799 847 return (cmd);
800 848 }
801 849
802 850 static void
803 851 nvme_free_cmd(nvme_cmd_t *cmd)
804 852 {
853 + /* Don't free commands on the lost commands list. */
854 + if (list_link_active(&cmd->nc_list))
855 + return;
856 +
805 857 if (cmd->nc_dma) {
806 858 if (cmd->nc_dma->nd_cached)
807 859 kmem_cache_free(cmd->nc_nvme->n_prp_cache,
808 860 cmd->nc_dma);
809 861 else
810 862 nvme_free_dma(cmd->nc_dma);
811 863 cmd->nc_dma = NULL;
812 864 }
813 865
814 866 cv_destroy(&cmd->nc_cv);
815 867 mutex_destroy(&cmd->nc_mutex);
816 868
817 869 kmem_cache_free(nvme_cmd_cache, cmd);
818 870 }
819 871
820 872 static void
821 873 nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
822 874 {
823 875 sema_p(&qp->nq_sema);
824 876 nvme_submit_cmd_common(qp, cmd);
825 877 }
826 878
827 879 static int
828 880 nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
829 881 {
830 882 if (sema_tryp(&qp->nq_sema) == 0)
831 883 return (EAGAIN);
832 884
833 885 nvme_submit_cmd_common(qp, cmd);
834 886 return (0);
835 887 }
836 888
837 889 static void
838 890 nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd)
839 891 {
840 892 nvme_reg_sqtdbl_t tail = { 0 };
841 893
842 894 mutex_enter(&qp->nq_mutex);
843 895 cmd->nc_completed = B_FALSE;
844 896
845 897 /*
846 898 * Try to insert the cmd into the active cmd array at the nq_next_cmd
847 899 * slot. If the slot is already occupied advance to the next slot and
848 900 * try again. This can happen for long running commands like async event
849 901 * requests.
850 902 */
851 903 while (qp->nq_cmd[qp->nq_next_cmd] != NULL)
852 904 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
853 905 qp->nq_cmd[qp->nq_next_cmd] = cmd;
854 906
855 907 qp->nq_active_cmds++;
856 908
857 909 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd;
858 910 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t));
859 911 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah,
860 912 sizeof (nvme_sqe_t) * qp->nq_sqtail,
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
861 913 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV);
862 914 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
863 915
864 916 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
865 917 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
866 918
867 919 mutex_exit(&qp->nq_mutex);
868 920 }
869 921
870 922 static nvme_cmd_t *
923 +nvme_unqueue_cmd(nvme_t *nvme, nvme_qpair_t *qp, int cid)
924 +{
925 + nvme_cmd_t *cmd;
926 +
927 + ASSERT(mutex_owned(&qp->nq_mutex));
928 + ASSERT3S(cid, <, qp->nq_nentry);
929 +
930 + cmd = qp->nq_cmd[cid];
931 + qp->nq_cmd[cid] = NULL;
932 + ASSERT3U(qp->nq_active_cmds, >, 0);
933 + qp->nq_active_cmds--;
934 + sema_v(&qp->nq_sema);
935 +
936 + ASSERT3P(cmd, !=, NULL);
937 + ASSERT3P(cmd->nc_nvme, ==, nvme);
938 + ASSERT3S(cmd->nc_sqe.sqe_cid, ==, cid);
939 +
940 + return (cmd);
941 +}
942 +
943 +static nvme_cmd_t *
871 944 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
872 945 {
873 946 nvme_reg_cqhdbl_t head = { 0 };
874 947
875 948 nvme_cqe_t *cqe;
876 949 nvme_cmd_t *cmd;
877 950
878 951 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
879 952 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
880 953
881 954 mutex_enter(&qp->nq_mutex);
882 955 cqe = &qp->nq_cq[qp->nq_cqhead];
883 956
884 957 /* Check phase tag of CQE. Hardware inverts it for new entries. */
885 958 if (cqe->cqe_sf.sf_p == qp->nq_phase) {
886 959 mutex_exit(&qp->nq_mutex);
887 960 return (NULL);
888 961 }
889 962
890 963 ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp);
891 - ASSERT(cqe->cqe_cid < qp->nq_nentry);
892 964
893 - cmd = qp->nq_cmd[cqe->cqe_cid];
894 - qp->nq_cmd[cqe->cqe_cid] = NULL;
895 - qp->nq_active_cmds--;
965 + cmd = nvme_unqueue_cmd(nvme, qp, cqe->cqe_cid);
896 966
897 - ASSERT(cmd != NULL);
898 - ASSERT(cmd->nc_nvme == nvme);
899 967 ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
900 - ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
901 968 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
902 969
903 970 qp->nq_sqhead = cqe->cqe_sqhd;
904 971
905 972 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
906 973
907 974 /* Toggle phase on wrap-around. */
908 975 if (qp->nq_cqhead == 0)
909 976 qp->nq_phase = qp->nq_phase ? 0 : 1;
910 977
911 978 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
912 979 mutex_exit(&qp->nq_mutex);
913 - sema_v(&qp->nq_sema);
914 980
915 981 return (cmd);
916 982 }
917 983
918 984 static int
919 985 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
920 986 {
921 987 nvme_cqe_t *cqe = &cmd->nc_cqe;
922 988
923 989 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
924 990 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
925 991 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
926 992 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
927 993 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
928 994
929 995 if (cmd->nc_xfer != NULL)
930 996 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
931 997
932 998 if (cmd->nc_nvme->n_strict_version) {
933 999 cmd->nc_nvme->n_dead = B_TRUE;
934 1000 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST);
935 1001 }
936 1002
937 1003 return (EIO);
938 1004 }
939 1005
940 1006 static int
941 1007 nvme_check_vendor_cmd_status(nvme_cmd_t *cmd)
942 1008 {
943 1009 nvme_cqe_t *cqe = &cmd->nc_cqe;
944 1010
945 1011 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
946 1012 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
947 1013 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
948 1014 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
949 1015 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
950 1016 if (!cmd->nc_nvme->n_ignore_unknown_vendor_status) {
951 1017 cmd->nc_nvme->n_dead = B_TRUE;
952 1018 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST);
953 1019 }
954 1020
955 1021 return (EIO);
956 1022 }
957 1023
958 1024 static int
959 1025 nvme_check_integrity_cmd_status(nvme_cmd_t *cmd)
960 1026 {
961 1027 nvme_cqe_t *cqe = &cmd->nc_cqe;
962 1028
963 1029 switch (cqe->cqe_sf.sf_sc) {
964 1030 case NVME_CQE_SC_INT_NVM_WRITE:
965 1031 /* write fail */
966 1032 /* TODO: post ereport */
967 1033 if (cmd->nc_xfer != NULL)
968 1034 bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
969 1035 return (EIO);
970 1036
971 1037 case NVME_CQE_SC_INT_NVM_READ:
972 1038 /* read fail */
973 1039 /* TODO: post ereport */
974 1040 if (cmd->nc_xfer != NULL)
975 1041 bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
976 1042 return (EIO);
977 1043
978 1044 default:
979 1045 return (nvme_check_unknown_cmd_status(cmd));
980 1046 }
981 1047 }
982 1048
983 1049 static int
984 1050 nvme_check_generic_cmd_status(nvme_cmd_t *cmd)
985 1051 {
986 1052 nvme_cqe_t *cqe = &cmd->nc_cqe;
987 1053
988 1054 switch (cqe->cqe_sf.sf_sc) {
989 1055 case NVME_CQE_SC_GEN_SUCCESS:
990 1056 return (0);
991 1057
992 1058 /*
993 1059 * Errors indicating a bug in the driver should cause a panic.
994 1060 */
995 1061 case NVME_CQE_SC_GEN_INV_OPC:
996 1062 /* Invalid Command Opcode */
997 1063 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
998 1064 "invalid opcode in cmd %p", (void *)cmd);
999 1065 return (0);
1000 1066
1001 1067 case NVME_CQE_SC_GEN_INV_FLD:
1002 1068 /* Invalid Field in Command */
1003 1069 if (!cmd->nc_dontpanic)
1004 1070 dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
1005 1071 "programming error: invalid field in cmd %p",
1006 1072 (void *)cmd);
1007 1073 return (EIO);
1008 1074
1009 1075 case NVME_CQE_SC_GEN_ID_CNFL:
1010 1076 /* Command ID Conflict */
1011 1077 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
1012 1078 "cmd ID conflict in cmd %p", (void *)cmd);
1013 1079 return (0);
1014 1080
1015 1081 case NVME_CQE_SC_GEN_INV_NS:
1016 1082 /* Invalid Namespace or Format */
1017 1083 if (!cmd->nc_dontpanic)
1018 1084 dev_err(cmd->nc_nvme->n_dip, CE_PANIC,
1019 1085 "programming error: " "invalid NS/format in cmd %p",
1020 1086 (void *)cmd);
1021 1087 return (EINVAL);
1022 1088
1023 1089 case NVME_CQE_SC_GEN_NVM_LBA_RANGE:
1024 1090 /* LBA Out Of Range */
1025 1091 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
1026 1092 "LBA out of range in cmd %p", (void *)cmd);
1027 1093 return (0);
1028 1094
1029 1095 /*
1030 1096 * Non-fatal errors, handle gracefully.
1031 1097 */
1032 1098 case NVME_CQE_SC_GEN_DATA_XFR_ERR:
1033 1099 /* Data Transfer Error (DMA) */
1034 1100 /* TODO: post ereport */
1035 1101 atomic_inc_32(&cmd->nc_nvme->n_data_xfr_err);
1036 1102 if (cmd->nc_xfer != NULL)
1037 1103 bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
1038 1104 return (EIO);
1039 1105
1040 1106 case NVME_CQE_SC_GEN_INTERNAL_ERR:
1041 1107 /*
1042 1108 * Internal Error. The spec (v1.0, section 4.5.1.2) says
1043 1109 * detailed error information is returned as async event,
1044 1110 * so we pretty much ignore the error here and handle it
1045 1111 * in the async event handler.
1046 1112 */
1047 1113 atomic_inc_32(&cmd->nc_nvme->n_internal_err);
1048 1114 if (cmd->nc_xfer != NULL)
1049 1115 bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
1050 1116 return (EIO);
1051 1117
1052 1118 case NVME_CQE_SC_GEN_ABORT_REQUEST:
1053 1119 /*
1054 1120 * Command Abort Requested. This normally happens only when a
1055 1121 * command times out.
1056 1122 */
1057 1123 /* TODO: post ereport or change blkdev to handle this? */
1058 1124 atomic_inc_32(&cmd->nc_nvme->n_abort_rq_err);
1059 1125 return (ECANCELED);
1060 1126
1061 1127 case NVME_CQE_SC_GEN_ABORT_PWRLOSS:
1062 1128 /* Command Aborted due to Power Loss Notification */
1063 1129 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST);
1064 1130 cmd->nc_nvme->n_dead = B_TRUE;
1065 1131 return (EIO);
1066 1132
1067 1133 case NVME_CQE_SC_GEN_ABORT_SQ_DEL:
1068 1134 /* Command Aborted due to SQ Deletion */
1069 1135 atomic_inc_32(&cmd->nc_nvme->n_abort_sq_del);
1070 1136 return (EIO);
1071 1137
1072 1138 case NVME_CQE_SC_GEN_NVM_CAP_EXC:
1073 1139 /* Capacity Exceeded */
1074 1140 atomic_inc_32(&cmd->nc_nvme->n_nvm_cap_exc);
1075 1141 if (cmd->nc_xfer != NULL)
1076 1142 bd_error(cmd->nc_xfer, BD_ERR_MEDIA);
1077 1143 return (EIO);
1078 1144
1079 1145 case NVME_CQE_SC_GEN_NVM_NS_NOTRDY:
1080 1146 /* Namespace Not Ready */
1081 1147 atomic_inc_32(&cmd->nc_nvme->n_nvm_ns_notrdy);
1082 1148 if (cmd->nc_xfer != NULL)
1083 1149 bd_error(cmd->nc_xfer, BD_ERR_NTRDY);
1084 1150 return (EIO);
1085 1151
1086 1152 default:
1087 1153 return (nvme_check_unknown_cmd_status(cmd));
1088 1154 }
1089 1155 }
1090 1156
1091 1157 static int
1092 1158 nvme_check_specific_cmd_status(nvme_cmd_t *cmd)
1093 1159 {
1094 1160 nvme_cqe_t *cqe = &cmd->nc_cqe;
1095 1161
1096 1162 switch (cqe->cqe_sf.sf_sc) {
1097 1163 case NVME_CQE_SC_SPC_INV_CQ:
1098 1164 /* Completion Queue Invalid */
1099 1165 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE);
1100 1166 atomic_inc_32(&cmd->nc_nvme->n_inv_cq_err);
1101 1167 return (EINVAL);
1102 1168
1103 1169 case NVME_CQE_SC_SPC_INV_QID:
1104 1170 /* Invalid Queue Identifier */
1105 1171 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE ||
1106 1172 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_SQUEUE ||
1107 1173 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE ||
1108 1174 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE);
1109 1175 atomic_inc_32(&cmd->nc_nvme->n_inv_qid_err);
1110 1176 return (EINVAL);
1111 1177
1112 1178 case NVME_CQE_SC_SPC_MAX_QSZ_EXC:
1113 1179 /* Max Queue Size Exceeded */
1114 1180 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE ||
1115 1181 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE);
1116 1182 atomic_inc_32(&cmd->nc_nvme->n_max_qsz_exc);
1117 1183 return (EINVAL);
1118 1184
1119 1185 case NVME_CQE_SC_SPC_ABRT_CMD_EXC:
1120 1186 /* Abort Command Limit Exceeded */
1121 1187 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT);
1122 1188 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
1123 1189 "abort command limit exceeded in cmd %p", (void *)cmd);
1124 1190 return (0);
1125 1191
1126 1192 case NVME_CQE_SC_SPC_ASYNC_EVREQ_EXC:
1127 1193 /* Async Event Request Limit Exceeded */
1128 1194 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ASYNC_EVENT);
1129 1195 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: "
1130 1196 "async event request limit exceeded in cmd %p",
1131 1197 (void *)cmd);
1132 1198 return (0);
1133 1199
1134 1200 case NVME_CQE_SC_SPC_INV_INT_VECT:
1135 1201 /* Invalid Interrupt Vector */
1136 1202 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE);
1137 1203 atomic_inc_32(&cmd->nc_nvme->n_inv_int_vect);
1138 1204 return (EINVAL);
1139 1205
1140 1206 case NVME_CQE_SC_SPC_INV_LOG_PAGE:
1141 1207 /* Invalid Log Page */
1142 1208 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_GET_LOG_PAGE);
1143 1209 atomic_inc_32(&cmd->nc_nvme->n_inv_log_page);
1144 1210 return (EINVAL);
1145 1211
1146 1212 case NVME_CQE_SC_SPC_INV_FORMAT:
1147 1213 /* Invalid Format */
1148 1214 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_FORMAT);
1149 1215 atomic_inc_32(&cmd->nc_nvme->n_inv_format);
1150 1216 if (cmd->nc_xfer != NULL)
1151 1217 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
1152 1218 return (EINVAL);
1153 1219
1154 1220 case NVME_CQE_SC_SPC_INV_Q_DEL:
1155 1221 /* Invalid Queue Deletion */
1156 1222 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE);
1157 1223 atomic_inc_32(&cmd->nc_nvme->n_inv_q_del);
1158 1224 return (EINVAL);
1159 1225
1160 1226 case NVME_CQE_SC_SPC_NVM_CNFL_ATTR:
1161 1227 /* Conflicting Attributes */
1162 1228 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_DSET_MGMT ||
1163 1229 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
1164 1230 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
1165 1231 atomic_inc_32(&cmd->nc_nvme->n_cnfl_attr);
1166 1232 if (cmd->nc_xfer != NULL)
1167 1233 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
1168 1234 return (EINVAL);
1169 1235
1170 1236 case NVME_CQE_SC_SPC_NVM_INV_PROT:
1171 1237 /* Invalid Protection Information */
1172 1238 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_COMPARE ||
1173 1239 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ ||
1174 1240 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
1175 1241 atomic_inc_32(&cmd->nc_nvme->n_inv_prot);
1176 1242 if (cmd->nc_xfer != NULL)
1177 1243 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
1178 1244 return (EINVAL);
1179 1245
1180 1246 case NVME_CQE_SC_SPC_NVM_READONLY:
1181 1247 /* Write to Read Only Range */
1182 1248 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE);
1183 1249 atomic_inc_32(&cmd->nc_nvme->n_readonly);
1184 1250 if (cmd->nc_xfer != NULL)
1185 1251 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
1186 1252 return (EROFS);
1187 1253
↓ open down ↓ |
264 lines elided |
↑ open up ↑ |
1188 1254 default:
1189 1255 return (nvme_check_unknown_cmd_status(cmd));
1190 1256 }
1191 1257 }
1192 1258
1193 1259 static inline int
1194 1260 nvme_check_cmd_status(nvme_cmd_t *cmd)
1195 1261 {
1196 1262 nvme_cqe_t *cqe = &cmd->nc_cqe;
1197 1263
1198 - /* take a shortcut if everything is alright */
1264 + /*
1265 + * Take a shortcut if the controller is dead, or if
1266 + * command status indicates no error.
1267 + */
1268 + if (cmd->nc_nvme->n_dead)
1269 + return (EIO);
1270 +
1199 1271 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
1200 1272 cqe->cqe_sf.sf_sc == NVME_CQE_SC_GEN_SUCCESS)
1201 1273 return (0);
1202 1274
1203 1275 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC)
1204 1276 return (nvme_check_generic_cmd_status(cmd));
1205 1277 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_SPECIFIC)
1206 1278 return (nvme_check_specific_cmd_status(cmd));
1207 1279 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_INTEGRITY)
1208 1280 return (nvme_check_integrity_cmd_status(cmd));
1209 1281 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_VENDOR)
1210 1282 return (nvme_check_vendor_cmd_status(cmd));
1211 1283
1212 1284 return (nvme_check_unknown_cmd_status(cmd));
1213 1285 }
1214 1286
1215 -/*
1216 - * nvme_abort_cmd_cb -- replaces nc_callback of aborted commands
1217 - *
1218 - * This functions takes care of cleaning up aborted commands. The command
1219 - * status is checked to catch any fatal errors.
1220 - */
1221 -static void
1222 -nvme_abort_cmd_cb(void *arg)
1287 +static int
1288 +nvme_abort_cmd(nvme_cmd_t *abort_cmd, uint_t sec)
1223 1289 {
1224 - nvme_cmd_t *cmd = arg;
1225 -
1226 - /*
1227 - * Grab the command mutex. Once we have it we hold the last reference
1228 - * to the command and can safely free it.
1229 - */
1230 - mutex_enter(&cmd->nc_mutex);
1231 - (void) nvme_check_cmd_status(cmd);
1232 - mutex_exit(&cmd->nc_mutex);
1233 -
1234 - nvme_free_cmd(cmd);
1235 -}
1236 -
1237 -static void
1238 -nvme_abort_cmd(nvme_cmd_t *abort_cmd)
1239 -{
1240 1290 nvme_t *nvme = abort_cmd->nc_nvme;
1241 1291 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1242 1292 nvme_abort_cmd_t ac = { 0 };
1293 + int ret = 0;
1243 1294
1244 1295 sema_p(&nvme->n_abort_sema);
1245 1296
1246 1297 ac.b.ac_cid = abort_cmd->nc_sqe.sqe_cid;
1247 1298 ac.b.ac_sqid = abort_cmd->nc_sqid;
1248 1299
1249 - /*
1250 - * Drop the mutex of the aborted command. From this point on
1251 - * we must assume that the abort callback has freed the command.
1252 - */
1253 - mutex_exit(&abort_cmd->nc_mutex);
1254 -
1255 1300 cmd->nc_sqid = 0;
1256 1301 cmd->nc_sqe.sqe_opc = NVME_OPC_ABORT;
1257 1302 cmd->nc_callback = nvme_wakeup_cmd;
1258 1303 cmd->nc_sqe.sqe_cdw10 = ac.r;
1259 1304
1260 1305 /*
1261 1306 * Send the ABORT to the hardware. The ABORT command will return _after_
1262 - * the aborted command has completed (aborted or otherwise).
1307 + * the aborted command has completed (aborted or otherwise), but since
1308 + * we still hold the aborted command's mutex its callback hasn't been
1309 + * processed yet.
1263 1310 */
1264 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1265 - sema_v(&nvme->n_abort_sema);
1266 - dev_err(nvme->n_dip, CE_WARN,
1267 - "!nvme_admin_cmd failed for ABORT");
1268 - atomic_inc_32(&nvme->n_abort_failed);
1269 - return;
1270 - }
1311 + nvme_admin_cmd(cmd, sec);
1271 1312 sema_v(&nvme->n_abort_sema);
1272 1313
1273 - if (nvme_check_cmd_status(cmd)) {
1314 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1274 1315 dev_err(nvme->n_dip, CE_WARN,
1275 1316 "!ABORT failed with sct = %x, sc = %x",
1276 1317 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1277 1318 atomic_inc_32(&nvme->n_abort_failed);
1278 1319 } else {
1279 - atomic_inc_32(&nvme->n_cmd_aborted);
1320 + dev_err(nvme->n_dip, CE_WARN,
1321 + "!ABORT of command %d/%d %ssuccessful",
1322 + abort_cmd->nc_sqe.sqe_cid, abort_cmd->nc_sqid,
1323 + cmd->nc_cqe.cqe_dw0 & 1 ? "un" : "");
1324 + if ((cmd->nc_cqe.cqe_dw0 & 1) == 0)
1325 + atomic_inc_32(&nvme->n_cmd_aborted);
1280 1326 }
1281 1327
1282 1328 nvme_free_cmd(cmd);
1329 + return (ret);
1283 1330 }
1284 1331
1285 1332 /*
1286 1333 * nvme_wait_cmd -- wait for command completion or timeout
1287 1334 *
1288 - * Returns B_TRUE if the command completed normally.
1289 - *
1290 - * Returns B_FALSE if the command timed out and an abort was attempted. The
1291 - * command mutex will be dropped and the command must be considered freed. The
1292 - * freeing of the command is normally done by the abort command callback.
1293 - *
1294 1335 * In case of a serious error or a timeout of the abort command the hardware
1295 1336 * will be declared dead and FMA will be notified.
1296 1337 */
1297 -static boolean_t
1338 +static void
1298 1339 nvme_wait_cmd(nvme_cmd_t *cmd, uint_t sec)
1299 1340 {
1300 1341 clock_t timeout = ddi_get_lbolt() + drv_usectohz(sec * MICROSEC);
1301 1342 nvme_t *nvme = cmd->nc_nvme;
1302 1343 nvme_reg_csts_t csts;
1344 + nvme_qpair_t *qp;
1303 1345
1304 1346 ASSERT(mutex_owned(&cmd->nc_mutex));
1305 1347
1306 1348 while (!cmd->nc_completed) {
1307 1349 if (cv_timedwait(&cmd->nc_cv, &cmd->nc_mutex, timeout) == -1)
1308 1350 break;
1309 1351 }
1310 1352
1311 1353 if (cmd->nc_completed)
1312 - return (B_TRUE);
1354 + return;
1313 1355
1314 1356 /*
1315 - * The command timed out. Change the callback to the cleanup function.
1316 - */
1317 - cmd->nc_callback = nvme_abort_cmd_cb;
1318 -
1319 - /*
1357 + * The command timed out.
1358 + *
1320 1359 * Check controller for fatal status, any errors associated with the
1321 1360 * register or DMA handle, or for a double timeout (abort command timed
1322 1361 * out). If necessary log a warning and call FMA.
1323 1362 */
1324 1363 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
1325 - dev_err(nvme->n_dip, CE_WARN, "!command timeout, "
1326 - "OPC = %x, CFS = %d", cmd->nc_sqe.sqe_opc, csts.b.csts_cfs);
1364 + dev_err(nvme->n_dip, CE_WARN, "!command %d/%d timeout, "
1365 + "OPC = %x, CFS = %d", cmd->nc_sqe.sqe_cid, cmd->nc_sqid,
1366 + cmd->nc_sqe.sqe_opc, csts.b.csts_cfs);
1327 1367 atomic_inc_32(&nvme->n_cmd_timeout);
1328 1368
1329 1369 if (csts.b.csts_cfs ||
1330 1370 nvme_check_regs_hdl(nvme) ||
1331 1371 nvme_check_dma_hdl(cmd->nc_dma) ||
1332 1372 cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT) {
1333 1373 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST);
1334 1374 nvme->n_dead = B_TRUE;
1335 - mutex_exit(&cmd->nc_mutex);
1336 - } else {
1375 + } else if (nvme_abort_cmd(cmd, sec) == 0) {
1337 1376 /*
1338 - * Try to abort the command. The command mutex is released by
1339 - * nvme_abort_cmd().
1340 - * If the abort succeeds it will have freed the aborted command.
1341 - * If the abort fails for other reasons we must assume that the
1342 - * command may complete at any time, and the callback will free
1343 - * it for us.
1377 + * If the abort succeeded the command should complete
1378 + * immediately with an appropriate status.
1344 1379 */
1345 - nvme_abort_cmd(cmd);
1380 + while (!cmd->nc_completed)
1381 + cv_wait(&cmd->nc_cv, &cmd->nc_mutex);
1382 +
1383 + return;
1346 1384 }
1347 1385
1348 - return (B_FALSE);
1386 + qp = nvme->n_ioq[cmd->nc_sqid];
1387 +
1388 + mutex_enter(&qp->nq_mutex);
1389 + (void) nvme_unqueue_cmd(nvme, qp, cmd->nc_sqe.sqe_cid);
1390 + mutex_exit(&qp->nq_mutex);
1391 +
1392 + /*
1393 + * As we don't know what the presumed dead hardware might still do with
1394 + * the DMA memory, we'll put the command on the lost commands list if it
1395 + * has any DMA memory.
1396 + */
1397 + if (cmd->nc_dma != NULL) {
1398 + mutex_enter(&nvme_lc_mutex);
1399 + list_insert_head(&nvme_lost_cmds, cmd);
1400 + mutex_exit(&nvme_lc_mutex);
1401 + }
1349 1402 }
1350 1403
1351 1404 static void
1352 1405 nvme_wakeup_cmd(void *arg)
1353 1406 {
1354 1407 nvme_cmd_t *cmd = arg;
1355 1408
1356 1409 mutex_enter(&cmd->nc_mutex);
1357 - /*
1358 - * There is a slight chance that this command completed shortly after
1359 - * the timeout was hit in nvme_wait_cmd() but before the callback was
1360 - * changed. Catch that case here and clean up accordingly.
1361 - */
1362 - if (cmd->nc_callback == nvme_abort_cmd_cb) {
1363 - mutex_exit(&cmd->nc_mutex);
1364 - nvme_abort_cmd_cb(cmd);
1365 - return;
1366 - }
1367 -
1368 1410 cmd->nc_completed = B_TRUE;
1369 1411 cv_signal(&cmd->nc_cv);
1370 1412 mutex_exit(&cmd->nc_mutex);
1371 1413 }
1372 1414
1373 1415 static void
1374 1416 nvme_async_event_task(void *arg)
1375 1417 {
1376 1418 nvme_cmd_t *cmd = arg;
1377 1419 nvme_t *nvme = cmd->nc_nvme;
1378 1420 nvme_error_log_entry_t *error_log = NULL;
1379 1421 nvme_health_log_t *health_log = NULL;
1380 1422 size_t logsize = 0;
1381 1423 nvme_async_event_t event;
1382 1424
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
1383 1425 /*
1384 1426 * Check for errors associated with the async request itself. The only
1385 1427 * command-specific error is "async event limit exceeded", which
1386 1428 * indicates a programming error in the driver and causes a panic in
1387 1429 * nvme_check_cmd_status().
1388 1430 *
1389 1431 * Other possible errors are various scenarios where the async request
1390 1432 * was aborted, or internal errors in the device. Internal errors are
1391 1433 * reported to FMA, the command aborts need no special handling here.
1392 1434 */
1393 - if (nvme_check_cmd_status(cmd)) {
1435 + if (nvme_check_cmd_status(cmd) != 0) {
1394 1436 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
1395 1437 "!async event request returned failure, sct = %x, "
1396 1438 "sc = %x, dnr = %d, m = %d", cmd->nc_cqe.cqe_sf.sf_sct,
1397 1439 cmd->nc_cqe.cqe_sf.sf_sc, cmd->nc_cqe.cqe_sf.sf_dnr,
1398 1440 cmd->nc_cqe.cqe_sf.sf_m);
1399 1441
1400 1442 if (cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
1401 1443 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INTERNAL_ERR) {
1402 1444 cmd->nc_nvme->n_dead = B_TRUE;
1403 1445 ddi_fm_service_impact(cmd->nc_nvme->n_dip,
1404 1446 DDI_SERVICE_LOST);
1405 1447 }
1406 1448 nvme_free_cmd(cmd);
1407 1449 return;
1408 1450 }
1409 1451
1410 1452
1411 1453 event.r = cmd->nc_cqe.cqe_dw0;
1412 1454
1413 1455 /* Clear CQE and re-submit the async request. */
1414 1456 bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
1415 1457 nvme_submit_admin_cmd(nvme->n_adminq, cmd);
1416 1458
1417 1459 switch (event.b.ae_type) {
1418 1460 case NVME_ASYNC_TYPE_ERROR:
1419 1461 if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) {
1420 1462 (void) nvme_get_logpage(nvme, (void **)&error_log,
1421 1463 &logsize, event.b.ae_logpage);
1422 1464 } else {
1423 1465 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
1424 1466 "async event reply: %d", event.b.ae_logpage);
1425 1467 atomic_inc_32(&nvme->n_wrong_logpage);
1426 1468 }
1427 1469
1428 1470 switch (event.b.ae_info) {
1429 1471 case NVME_ASYNC_ERROR_INV_SQ:
1430 1472 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1431 1473 "invalid submission queue");
1432 1474 return;
1433 1475
1434 1476 case NVME_ASYNC_ERROR_INV_DBL:
1435 1477 dev_err(nvme->n_dip, CE_PANIC, "programming error: "
1436 1478 "invalid doorbell write value");
1437 1479 return;
1438 1480
1439 1481 case NVME_ASYNC_ERROR_DIAGFAIL:
1440 1482 dev_err(nvme->n_dip, CE_WARN, "!diagnostic failure");
1441 1483 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST);
1442 1484 nvme->n_dead = B_TRUE;
1443 1485 atomic_inc_32(&nvme->n_diagfail_event);
1444 1486 break;
1445 1487
1446 1488 case NVME_ASYNC_ERROR_PERSISTENT:
1447 1489 dev_err(nvme->n_dip, CE_WARN, "!persistent internal "
1448 1490 "device error");
1449 1491 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST);
1450 1492 nvme->n_dead = B_TRUE;
1451 1493 atomic_inc_32(&nvme->n_persistent_event);
1452 1494 break;
1453 1495
1454 1496 case NVME_ASYNC_ERROR_TRANSIENT:
1455 1497 dev_err(nvme->n_dip, CE_WARN, "!transient internal "
1456 1498 "device error");
1457 1499 /* TODO: send ereport */
1458 1500 atomic_inc_32(&nvme->n_transient_event);
1459 1501 break;
1460 1502
1461 1503 case NVME_ASYNC_ERROR_FW_LOAD:
1462 1504 dev_err(nvme->n_dip, CE_WARN,
1463 1505 "!firmware image load error");
1464 1506 atomic_inc_32(&nvme->n_fw_load_event);
1465 1507 break;
1466 1508 }
1467 1509 break;
1468 1510
1469 1511 case NVME_ASYNC_TYPE_HEALTH:
1470 1512 if (event.b.ae_logpage == NVME_LOGPAGE_HEALTH) {
1471 1513 (void) nvme_get_logpage(nvme, (void **)&health_log,
1472 1514 &logsize, event.b.ae_logpage, -1);
1473 1515 } else {
1474 1516 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in "
1475 1517 "async event reply: %d", event.b.ae_logpage);
1476 1518 atomic_inc_32(&nvme->n_wrong_logpage);
1477 1519 }
1478 1520
1479 1521 switch (event.b.ae_info) {
1480 1522 case NVME_ASYNC_HEALTH_RELIABILITY:
1481 1523 dev_err(nvme->n_dip, CE_WARN,
1482 1524 "!device reliability compromised");
1483 1525 /* TODO: send ereport */
1484 1526 atomic_inc_32(&nvme->n_reliability_event);
1485 1527 break;
1486 1528
1487 1529 case NVME_ASYNC_HEALTH_TEMPERATURE:
1488 1530 dev_err(nvme->n_dip, CE_WARN,
1489 1531 "!temperature above threshold");
1490 1532 /* TODO: send ereport */
1491 1533 atomic_inc_32(&nvme->n_temperature_event);
1492 1534 break;
1493 1535
1494 1536 case NVME_ASYNC_HEALTH_SPARE:
1495 1537 dev_err(nvme->n_dip, CE_WARN,
1496 1538 "!spare space below threshold");
1497 1539 /* TODO: send ereport */
1498 1540 atomic_inc_32(&nvme->n_spare_event);
1499 1541 break;
1500 1542 }
1501 1543 break;
1502 1544
1503 1545 case NVME_ASYNC_TYPE_VENDOR:
1504 1546 dev_err(nvme->n_dip, CE_WARN, "!vendor specific async event "
1505 1547 "received, info = %x, logpage = %x", event.b.ae_info,
1506 1548 event.b.ae_logpage);
1507 1549 atomic_inc_32(&nvme->n_vendor_event);
1508 1550 break;
1509 1551
1510 1552 default:
1511 1553 dev_err(nvme->n_dip, CE_WARN, "!unknown async event received, "
1512 1554 "type = %x, info = %x, logpage = %x", event.b.ae_type,
1513 1555 event.b.ae_info, event.b.ae_logpage);
1514 1556 atomic_inc_32(&nvme->n_unknown_event);
↓ open down ↓ |
111 lines elided |
↑ open up ↑ |
1515 1557 break;
1516 1558 }
1517 1559
1518 1560 if (error_log)
1519 1561 kmem_free(error_log, logsize);
1520 1562
1521 1563 if (health_log)
1522 1564 kmem_free(health_log, logsize);
1523 1565 }
1524 1566
1525 -static int
1567 +static void
1526 1568 nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
1527 1569 {
1528 1570 mutex_enter(&cmd->nc_mutex);
1529 1571 nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd);
1530 -
1531 - if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
1532 - /*
1533 - * The command timed out. An abort command was posted that
1534 - * will take care of the cleanup.
1535 - */
1536 - return (DDI_FAILURE);
1537 - }
1572 + nvme_wait_cmd(cmd, sec);
1538 1573 mutex_exit(&cmd->nc_mutex);
1539 -
1540 - return (DDI_SUCCESS);
1541 1574 }
1542 1575
1543 1576 static void
1544 1577 nvme_async_event(nvme_t *nvme)
1545 1578 {
1546 1579 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1547 1580
1548 1581 cmd->nc_sqid = 0;
1549 1582 cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
1550 1583 cmd->nc_callback = nvme_async_event_task;
1551 1584
1552 1585 nvme_submit_admin_cmd(nvme->n_adminq, cmd);
1553 1586 }
1554 1587
1555 1588 static int
1556 1589 nvme_format_nvm(nvme_t *nvme, uint32_t nsid, uint8_t lbaf, boolean_t ms,
1557 1590 uint8_t pi, boolean_t pil, uint8_t ses)
1558 1591 {
1559 1592 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1560 1593 nvme_format_nvm_t format_nvm = { 0 };
1561 1594 int ret;
1562 1595
1563 1596 format_nvm.b.fm_lbaf = lbaf & 0xf;
1564 1597 format_nvm.b.fm_ms = ms ? 1 : 0;
1565 1598 format_nvm.b.fm_pi = pi & 0x7;
1566 1599 format_nvm.b.fm_pil = pil ? 1 : 0;
1567 1600 format_nvm.b.fm_ses = ses & 0x7;
1568 1601
1569 1602 cmd->nc_sqid = 0;
1570 1603 cmd->nc_callback = nvme_wakeup_cmd;
1571 1604 cmd->nc_sqe.sqe_nsid = nsid;
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
1572 1605 cmd->nc_sqe.sqe_opc = NVME_OPC_NVM_FORMAT;
1573 1606 cmd->nc_sqe.sqe_cdw10 = format_nvm.r;
1574 1607
1575 1608 /*
1576 1609 * Some devices like Samsung SM951 don't allow formatting of all
1577 1610 * namespaces in one command. Handle that gracefully.
1578 1611 */
1579 1612 if (nsid == (uint32_t)-1)
1580 1613 cmd->nc_dontpanic = B_TRUE;
1581 1614
1582 - if ((ret = nvme_admin_cmd(cmd, nvme_format_cmd_timeout))
1583 - != DDI_SUCCESS) {
1584 - dev_err(nvme->n_dip, CE_WARN,
1585 - "!nvme_admin_cmd failed for FORMAT NVM");
1586 - return (EIO);
1587 - }
1615 + nvme_admin_cmd(cmd, nvme_format_cmd_timeout);
1588 1616
1589 1617 if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1590 1618 dev_err(nvme->n_dip, CE_WARN,
1591 1619 "!FORMAT failed with sct = %x, sc = %x",
1592 1620 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1593 1621 }
1594 1622
1595 1623 nvme_free_cmd(cmd);
1596 1624 return (ret);
1597 1625 }
1598 1626
1599 1627 static int
1600 1628 nvme_get_logpage(nvme_t *nvme, void **buf, size_t *bufsize, uint8_t logpage,
1601 1629 ...)
1602 1630 {
1603 1631 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1604 1632 nvme_getlogpage_t getlogpage = { 0 };
1605 1633 va_list ap;
1606 - int ret = DDI_FAILURE;
1634 + int ret;
1607 1635
1608 1636 va_start(ap, logpage);
1609 1637
1610 1638 cmd->nc_sqid = 0;
1611 1639 cmd->nc_callback = nvme_wakeup_cmd;
1612 1640 cmd->nc_sqe.sqe_opc = NVME_OPC_GET_LOG_PAGE;
1613 1641
1614 1642 getlogpage.b.lp_lid = logpage;
1615 1643
1616 1644 switch (logpage) {
1617 1645 case NVME_LOGPAGE_ERROR:
1618 1646 cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
1619 1647 /*
1620 1648 * The GET LOG PAGE command can use at most 2 pages to return
1621 1649 * data, PRP lists are not supported.
1622 1650 */
1623 1651 *bufsize = MIN(2 * nvme->n_pagesize,
1624 1652 nvme->n_error_log_len * sizeof (nvme_error_log_entry_t));
1625 1653 break;
1626 1654
1627 1655 case NVME_LOGPAGE_HEALTH:
1628 1656 cmd->nc_sqe.sqe_nsid = va_arg(ap, uint32_t);
1629 1657 *bufsize = sizeof (nvme_health_log_t);
1630 1658 break;
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
1631 1659
1632 1660 case NVME_LOGPAGE_FWSLOT:
1633 1661 cmd->nc_sqe.sqe_nsid = (uint32_t)-1;
1634 1662 *bufsize = sizeof (nvme_fwslot_log_t);
1635 1663 break;
1636 1664
1637 1665 default:
1638 1666 dev_err(nvme->n_dip, CE_WARN, "!unknown log page requested: %d",
1639 1667 logpage);
1640 1668 atomic_inc_32(&nvme->n_unknown_logpage);
1669 + ret = EINVAL;
1641 1670 goto fail;
1642 1671 }
1643 1672
1644 1673 va_end(ap);
1645 1674
1646 1675 getlogpage.b.lp_numd = *bufsize / sizeof (uint32_t) - 1;
1647 1676
1648 1677 cmd->nc_sqe.sqe_cdw10 = getlogpage.r;
1649 1678
1650 1679 if (nvme_zalloc_dma(nvme, getlogpage.b.lp_numd * sizeof (uint32_t),
1651 1680 DDI_DMA_READ, &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
1652 1681 dev_err(nvme->n_dip, CE_WARN,
1653 1682 "!nvme_zalloc_dma failed for GET LOG PAGE");
1683 + ret = ENOMEM;
1654 1684 goto fail;
1655 1685 }
1656 1686
1657 1687 if (cmd->nc_dma->nd_ncookie > 2) {
1658 1688 dev_err(nvme->n_dip, CE_WARN,
1659 1689 "!too many DMA cookies for GET LOG PAGE");
1660 1690 atomic_inc_32(&nvme->n_too_many_cookies);
1691 + ret = ENOMEM;
1661 1692 goto fail;
1662 1693 }
1663 1694
1664 1695 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress;
1665 1696 if (cmd->nc_dma->nd_ncookie > 1) {
1666 1697 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah,
1667 1698 &cmd->nc_dma->nd_cookie);
1668 1699 cmd->nc_sqe.sqe_dptr.d_prp[1] =
1669 1700 cmd->nc_dma->nd_cookie.dmac_laddress;
1670 1701 }
1671 1702
1672 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1673 - dev_err(nvme->n_dip, CE_WARN,
1674 - "!nvme_admin_cmd failed for GET LOG PAGE");
1675 - return (ret);
1676 - }
1703 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
1677 1704
1678 - if (nvme_check_cmd_status(cmd)) {
1705 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1679 1706 dev_err(nvme->n_dip, CE_WARN,
1680 1707 "!GET LOG PAGE failed with sct = %x, sc = %x",
1681 1708 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1682 1709 goto fail;
1683 1710 }
1684 1711
1685 1712 *buf = kmem_alloc(*bufsize, KM_SLEEP);
1686 1713 bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
1687 1714
1688 - ret = DDI_SUCCESS;
1689 -
1690 1715 fail:
1691 1716 nvme_free_cmd(cmd);
1692 1717
1693 1718 return (ret);
1694 1719 }
1695 1720
1696 -static void *
1697 -nvme_identify(nvme_t *nvme, uint32_t nsid)
1721 +static int
1722 +nvme_identify(nvme_t *nvme, uint32_t nsid, void **buf)
1698 1723 {
1699 1724 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1700 - void *buf = NULL;
1725 + int ret;
1701 1726
1727 + if (buf == NULL)
1728 + return (EINVAL);
1729 +
1702 1730 cmd->nc_sqid = 0;
1703 1731 cmd->nc_callback = nvme_wakeup_cmd;
1704 1732 cmd->nc_sqe.sqe_opc = NVME_OPC_IDENTIFY;
1705 1733 cmd->nc_sqe.sqe_nsid = nsid;
1706 1734 cmd->nc_sqe.sqe_cdw10 = nsid ? NVME_IDENTIFY_NSID : NVME_IDENTIFY_CTRL;
1707 1735
1708 1736 if (nvme_zalloc_dma(nvme, NVME_IDENTIFY_BUFSIZE, DDI_DMA_READ,
1709 1737 &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
1710 1738 dev_err(nvme->n_dip, CE_WARN,
1711 1739 "!nvme_zalloc_dma failed for IDENTIFY");
1740 + ret = ENOMEM;
1712 1741 goto fail;
1713 1742 }
1714 1743
1715 1744 if (cmd->nc_dma->nd_ncookie > 2) {
1716 1745 dev_err(nvme->n_dip, CE_WARN,
1717 1746 "!too many DMA cookies for IDENTIFY");
1718 1747 atomic_inc_32(&nvme->n_too_many_cookies);
1748 + ret = ENOMEM;
1719 1749 goto fail;
1720 1750 }
1721 1751
1722 1752 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress;
1723 1753 if (cmd->nc_dma->nd_ncookie > 1) {
1724 1754 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah,
1725 1755 &cmd->nc_dma->nd_cookie);
1726 1756 cmd->nc_sqe.sqe_dptr.d_prp[1] =
1727 1757 cmd->nc_dma->nd_cookie.dmac_laddress;
1728 1758 }
1729 1759
1730 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1731 - dev_err(nvme->n_dip, CE_WARN,
1732 - "!nvme_admin_cmd failed for IDENTIFY");
1733 - return (NULL);
1734 - }
1760 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
1735 1761
1736 - if (nvme_check_cmd_status(cmd)) {
1762 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1737 1763 dev_err(nvme->n_dip, CE_WARN,
1738 1764 "!IDENTIFY failed with sct = %x, sc = %x",
1739 1765 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1740 1766 goto fail;
1741 1767 }
1742 1768
1743 - buf = kmem_alloc(NVME_IDENTIFY_BUFSIZE, KM_SLEEP);
1744 - bcopy(cmd->nc_dma->nd_memp, buf, NVME_IDENTIFY_BUFSIZE);
1769 + *buf = kmem_alloc(NVME_IDENTIFY_BUFSIZE, KM_SLEEP);
1770 + bcopy(cmd->nc_dma->nd_memp, *buf, NVME_IDENTIFY_BUFSIZE);
1745 1771
1746 1772 fail:
1747 1773 nvme_free_cmd(cmd);
1748 1774
1749 - return (buf);
1775 + return (ret);
1750 1776 }
1751 1777
1752 -static boolean_t
1778 +static int
1753 1779 nvme_set_features(nvme_t *nvme, uint32_t nsid, uint8_t feature, uint32_t val,
1754 1780 uint32_t *res)
1755 1781 {
1756 1782 _NOTE(ARGUNUSED(nsid));
1757 1783 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1758 - boolean_t ret = B_FALSE;
1784 + int ret = EINVAL;
1759 1785
1760 1786 ASSERT(res != NULL);
1761 1787
1762 1788 cmd->nc_sqid = 0;
1763 1789 cmd->nc_callback = nvme_wakeup_cmd;
1764 1790 cmd->nc_sqe.sqe_opc = NVME_OPC_SET_FEATURES;
1765 1791 cmd->nc_sqe.sqe_cdw10 = feature;
1766 1792 cmd->nc_sqe.sqe_cdw11 = val;
1767 1793
1768 1794 switch (feature) {
1769 1795 case NVME_FEAT_WRITE_CACHE:
1770 1796 if (!nvme->n_write_cache_present)
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
1771 1797 goto fail;
1772 1798 break;
1773 1799
1774 1800 case NVME_FEAT_NQUEUES:
1775 1801 break;
1776 1802
1777 1803 default:
1778 1804 goto fail;
1779 1805 }
1780 1806
1781 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1782 - dev_err(nvme->n_dip, CE_WARN,
1783 - "!nvme_admin_cmd failed for SET FEATURES");
1784 - return (ret);
1785 - }
1807 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
1786 1808
1787 - if (nvme_check_cmd_status(cmd)) {
1809 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1788 1810 dev_err(nvme->n_dip, CE_WARN,
1789 1811 "!SET FEATURES %d failed with sct = %x, sc = %x",
1790 1812 feature, cmd->nc_cqe.cqe_sf.sf_sct,
1791 1813 cmd->nc_cqe.cqe_sf.sf_sc);
1792 1814 goto fail;
1793 1815 }
1794 1816
1795 1817 *res = cmd->nc_cqe.cqe_dw0;
1796 - ret = B_TRUE;
1797 1818
1798 1819 fail:
1799 1820 nvme_free_cmd(cmd);
1800 1821 return (ret);
1801 1822 }
1802 1823
1803 -static boolean_t
1824 +static int
1804 1825 nvme_get_features(nvme_t *nvme, uint32_t nsid, uint8_t feature, uint32_t *res,
1805 1826 void **buf, size_t *bufsize)
1806 1827 {
1807 1828 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1808 - boolean_t ret = B_FALSE;
1829 + int ret = EINVAL;
1809 1830
1810 1831 ASSERT(res != NULL);
1811 1832
1812 1833 if (bufsize != NULL)
1813 1834 *bufsize = 0;
1814 1835
1815 1836 cmd->nc_sqid = 0;
1816 1837 cmd->nc_callback = nvme_wakeup_cmd;
1817 1838 cmd->nc_sqe.sqe_opc = NVME_OPC_GET_FEATURES;
1818 1839 cmd->nc_sqe.sqe_cdw10 = feature;
1819 1840 cmd->nc_sqe.sqe_cdw11 = *res;
1820 1841
1821 1842 switch (feature) {
1822 1843 case NVME_FEAT_ARBITRATION:
1823 1844 case NVME_FEAT_POWER_MGMT:
1824 1845 case NVME_FEAT_TEMPERATURE:
1825 1846 case NVME_FEAT_ERROR:
1826 1847 case NVME_FEAT_NQUEUES:
1827 1848 case NVME_FEAT_INTR_COAL:
1828 1849 case NVME_FEAT_INTR_VECT:
1829 1850 case NVME_FEAT_WRITE_ATOM:
1830 1851 case NVME_FEAT_ASYNC_EVENT:
1831 1852 case NVME_FEAT_PROGRESS:
1832 1853 break;
1833 1854
1834 1855 case NVME_FEAT_WRITE_CACHE:
1835 1856 if (!nvme->n_write_cache_present)
1836 1857 goto fail;
1837 1858 break;
1838 1859
1839 1860 case NVME_FEAT_LBA_RANGE:
1840 1861 if (!nvme->n_lba_range_supported)
1841 1862 goto fail;
1842 1863
1843 1864 /*
1844 1865 * The LBA Range Type feature is optional. There doesn't seem
1845 1866 * be a method of detecting whether it is supported other than
1846 1867 * using it. This will cause a "invalid field in command" error,
1847 1868 * which is normally considered a programming error and causes
1848 1869 * panic in nvme_check_generic_cmd_status().
1849 1870 */
1850 1871 cmd->nc_dontpanic = B_TRUE;
1851 1872 cmd->nc_sqe.sqe_nsid = nsid;
1852 1873 ASSERT(bufsize != NULL);
1853 1874 *bufsize = NVME_LBA_RANGE_BUFSIZE;
1854 1875
1855 1876 break;
1856 1877
1857 1878 case NVME_FEAT_AUTO_PST:
1858 1879 if (!nvme->n_auto_pst_supported)
1859 1880 goto fail;
1860 1881
1861 1882 ASSERT(bufsize != NULL);
1862 1883 *bufsize = NVME_AUTO_PST_BUFSIZE;
1863 1884 break;
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
1864 1885
1865 1886 default:
1866 1887 goto fail;
1867 1888 }
1868 1889
1869 1890 if (bufsize != NULL && *bufsize != 0) {
1870 1891 if (nvme_zalloc_dma(nvme, *bufsize, DDI_DMA_READ,
1871 1892 &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
1872 1893 dev_err(nvme->n_dip, CE_WARN,
1873 1894 "!nvme_zalloc_dma failed for GET FEATURES");
1895 + ret = ENOMEM;
1874 1896 goto fail;
1875 1897 }
1876 1898
1877 1899 if (cmd->nc_dma->nd_ncookie > 2) {
1878 1900 dev_err(nvme->n_dip, CE_WARN,
1879 1901 "!too many DMA cookies for GET FEATURES");
1880 1902 atomic_inc_32(&nvme->n_too_many_cookies);
1903 + ret = ENOMEM;
1881 1904 goto fail;
1882 1905 }
1883 1906
1884 1907 cmd->nc_sqe.sqe_dptr.d_prp[0] =
1885 1908 cmd->nc_dma->nd_cookie.dmac_laddress;
1886 1909 if (cmd->nc_dma->nd_ncookie > 1) {
1887 1910 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah,
1888 1911 &cmd->nc_dma->nd_cookie);
1889 1912 cmd->nc_sqe.sqe_dptr.d_prp[1] =
1890 1913 cmd->nc_dma->nd_cookie.dmac_laddress;
1891 1914 }
1892 1915 }
1893 1916
1894 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1895 - dev_err(nvme->n_dip, CE_WARN,
1896 - "!nvme_admin_cmd failed for GET FEATURES");
1897 - return (ret);
1898 - }
1917 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
1899 1918
1900 - if (nvme_check_cmd_status(cmd)) {
1919 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1901 1920 if (feature == NVME_FEAT_LBA_RANGE &&
1902 1921 cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC &&
1903 1922 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INV_FLD)
1904 1923 nvme->n_lba_range_supported = B_FALSE;
1905 1924 else
1906 1925 dev_err(nvme->n_dip, CE_WARN,
1907 1926 "!GET FEATURES %d failed with sct = %x, sc = %x",
1908 1927 feature, cmd->nc_cqe.cqe_sf.sf_sct,
1909 1928 cmd->nc_cqe.cqe_sf.sf_sc);
1910 1929 goto fail;
1911 1930 }
1912 1931
1913 1932 if (bufsize != NULL && *bufsize != 0) {
1914 1933 ASSERT(buf != NULL);
1915 1934 *buf = kmem_alloc(*bufsize, KM_SLEEP);
1916 1935 bcopy(cmd->nc_dma->nd_memp, *buf, *bufsize);
1917 1936 }
1918 1937
1919 1938 *res = cmd->nc_cqe.cqe_dw0;
1920 - ret = B_TRUE;
1921 1939
1922 1940 fail:
1923 1941 nvme_free_cmd(cmd);
1924 1942 return (ret);
1925 1943 }
1926 1944
1927 -static boolean_t
1945 +static int
1928 1946 nvme_write_cache_set(nvme_t *nvme, boolean_t enable)
1929 1947 {
1930 1948 nvme_write_cache_t nwc = { 0 };
1931 1949
1932 1950 if (enable)
1933 1951 nwc.b.wc_wce = 1;
1934 1952
1935 - if (!nvme_set_features(nvme, 0, NVME_FEAT_WRITE_CACHE, nwc.r, &nwc.r))
1936 - return (B_FALSE);
1937 -
1938 - return (B_TRUE);
1953 + return (nvme_set_features(nvme, 0, NVME_FEAT_WRITE_CACHE, nwc.r,
1954 + &nwc.r));
1939 1955 }
1940 1956
1941 1957 static int
1942 -nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues)
1958 +nvme_set_nqueues(nvme_t *nvme, uint16_t *nqueues)
1943 1959 {
1944 1960 nvme_nqueues_t nq = { 0 };
1961 + int ret;
1945 1962
1946 - nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1;
1963 + nq.b.nq_nsq = nq.b.nq_ncq = *nqueues - 1;
1947 1964
1948 - if (!nvme_set_features(nvme, 0, NVME_FEAT_NQUEUES, nq.r, &nq.r)) {
1949 - return (0);
1965 + ret = nvme_set_features(nvme, 0, NVME_FEAT_NQUEUES, nq.r, &nq.r);
1966 +
1967 + if (ret == 0) {
1968 + /*
1969 + * Always use the same number of submission and completion
1970 + * queues, and never use more than the requested number of
1971 + * queues.
1972 + */
1973 + *nqueues = MIN(*nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq) + 1);
1950 1974 }
1951 1975
1952 - /*
1953 - * Always use the same number of submission and completion queues, and
1954 - * never use more than the requested number of queues.
1955 - */
1956 - return (MIN(nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq) + 1));
1976 + return (ret);
1957 1977 }
1958 1978
1959 1979 static int
1960 1980 nvme_create_io_qpair(nvme_t *nvme, nvme_qpair_t *qp, uint16_t idx)
1961 1981 {
1962 1982 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
1963 1983 nvme_create_queue_dw10_t dw10 = { 0 };
1964 1984 nvme_create_cq_dw11_t c_dw11 = { 0 };
1965 1985 nvme_create_sq_dw11_t s_dw11 = { 0 };
1986 + int ret;
1966 1987
1967 1988 dw10.b.q_qid = idx;
1968 1989 dw10.b.q_qsize = qp->nq_nentry - 1;
1969 1990
1970 1991 c_dw11.b.cq_pc = 1;
1971 1992 c_dw11.b.cq_ien = 1;
1972 1993 c_dw11.b.cq_iv = idx % nvme->n_intr_cnt;
1973 1994
1974 1995 cmd->nc_sqid = 0;
1975 1996 cmd->nc_callback = nvme_wakeup_cmd;
1976 1997 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_CQUEUE;
1977 1998 cmd->nc_sqe.sqe_cdw10 = dw10.r;
1978 1999 cmd->nc_sqe.sqe_cdw11 = c_dw11.r;
1979 2000 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_cqdma->nd_cookie.dmac_laddress;
1980 2001
1981 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1982 - dev_err(nvme->n_dip, CE_WARN,
1983 - "!nvme_admin_cmd failed for CREATE CQUEUE");
1984 - return (DDI_FAILURE);
1985 - }
2002 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
1986 2003
1987 - if (nvme_check_cmd_status(cmd)) {
2004 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
1988 2005 dev_err(nvme->n_dip, CE_WARN,
1989 2006 "!CREATE CQUEUE failed with sct = %x, sc = %x",
1990 2007 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1991 - nvme_free_cmd(cmd);
1992 - return (DDI_FAILURE);
2008 + goto fail;
1993 2009 }
1994 2010
1995 2011 nvme_free_cmd(cmd);
1996 2012
1997 2013 s_dw11.b.sq_pc = 1;
1998 2014 s_dw11.b.sq_cqid = idx;
1999 2015
2000 2016 cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
2001 2017 cmd->nc_sqid = 0;
2002 2018 cmd->nc_callback = nvme_wakeup_cmd;
2003 2019 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_SQUEUE;
2004 2020 cmd->nc_sqe.sqe_cdw10 = dw10.r;
2005 2021 cmd->nc_sqe.sqe_cdw11 = s_dw11.r;
2006 2022 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress;
2007 2023
2008 - if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
2009 - dev_err(nvme->n_dip, CE_WARN,
2010 - "!nvme_admin_cmd failed for CREATE SQUEUE");
2011 - return (DDI_FAILURE);
2012 - }
2024 + nvme_admin_cmd(cmd, nvme_admin_cmd_timeout);
2013 2025
2014 - if (nvme_check_cmd_status(cmd)) {
2026 + if ((ret = nvme_check_cmd_status(cmd)) != 0) {
2015 2027 dev_err(nvme->n_dip, CE_WARN,
2016 2028 "!CREATE SQUEUE failed with sct = %x, sc = %x",
2017 2029 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
2018 - nvme_free_cmd(cmd);
2019 - return (DDI_FAILURE);
2030 + goto fail;
2020 2031 }
2021 2032
2033 +fail:
2022 2034 nvme_free_cmd(cmd);
2023 2035
2024 - return (DDI_SUCCESS);
2036 + return (ret);
2025 2037 }
2026 2038
2027 2039 static boolean_t
2028 2040 nvme_reset(nvme_t *nvme, boolean_t quiesce)
2029 2041 {
2030 2042 nvme_reg_csts_t csts;
2031 2043 int i;
2032 2044
2033 2045 nvme_put32(nvme, NVME_REG_CC, 0);
2034 2046
2035 2047 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2036 2048 if (csts.b.csts_rdy == 1) {
2037 2049 nvme_put32(nvme, NVME_REG_CC, 0);
2038 2050 for (i = 0; i != nvme->n_timeout * 10; i++) {
2039 2051 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2040 2052 if (csts.b.csts_rdy == 0)
2041 2053 break;
2042 2054
2043 2055 if (quiesce)
2044 2056 drv_usecwait(50000);
2045 2057 else
2046 2058 delay(drv_usectohz(50000));
2047 2059 }
2048 2060 }
2049 2061
2050 2062 nvme_put32(nvme, NVME_REG_AQA, 0);
2051 2063 nvme_put32(nvme, NVME_REG_ASQ, 0);
2052 2064 nvme_put32(nvme, NVME_REG_ACQ, 0);
2053 2065
2054 2066 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2055 2067 return (csts.b.csts_rdy == 0 ? B_TRUE : B_FALSE);
2056 2068 }
2057 2069
2058 2070 static void
2059 2071 nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce)
2060 2072 {
2061 2073 nvme_reg_cc_t cc;
2062 2074 nvme_reg_csts_t csts;
2063 2075 int i;
2064 2076
2065 2077 ASSERT(mode == NVME_CC_SHN_NORMAL || mode == NVME_CC_SHN_ABRUPT);
2066 2078
2067 2079 cc.r = nvme_get32(nvme, NVME_REG_CC);
2068 2080 cc.b.cc_shn = mode & 0x3;
2069 2081 nvme_put32(nvme, NVME_REG_CC, cc.r);
2070 2082
2071 2083 for (i = 0; i != 10; i++) {
2072 2084 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2073 2085 if (csts.b.csts_shst == NVME_CSTS_SHN_COMPLETE)
2074 2086 break;
2075 2087
2076 2088 if (quiesce)
2077 2089 drv_usecwait(100000);
2078 2090 else
2079 2091 delay(drv_usectohz(100000));
2080 2092 }
2081 2093 }
2082 2094
2083 2095
2084 2096 static void
2085 2097 nvme_prepare_devid(nvme_t *nvme, uint32_t nsid)
2086 2098 {
2087 2099 /*
2088 2100 * Section 7.7 of the spec describes how to get a unique ID for
2089 2101 * the controller: the vendor ID, the model name and the serial
2090 2102 * number shall be unique when combined.
2091 2103 *
2092 2104 * If a namespace has no EUI64 we use the above and add the hex
2093 2105 * namespace ID to get a unique ID for the namespace.
2094 2106 */
2095 2107 char model[sizeof (nvme->n_idctl->id_model) + 1];
2096 2108 char serial[sizeof (nvme->n_idctl->id_serial) + 1];
2097 2109
2098 2110 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model));
2099 2111 bcopy(nvme->n_idctl->id_serial, serial,
2100 2112 sizeof (nvme->n_idctl->id_serial));
2101 2113
2102 2114 model[sizeof (nvme->n_idctl->id_model)] = '\0';
2103 2115 serial[sizeof (nvme->n_idctl->id_serial)] = '\0';
2104 2116
2105 2117 nvme->n_ns[nsid - 1].ns_devid = kmem_asprintf("%4X-%s-%s-%X",
2106 2118 nvme->n_idctl->id_vid, model, serial, nsid);
↓ open down ↓ |
72 lines elided |
↑ open up ↑ |
2107 2119 }
2108 2120
2109 2121 static int
2110 2122 nvme_init_ns(nvme_t *nvme, int nsid)
2111 2123 {
2112 2124 nvme_namespace_t *ns = &nvme->n_ns[nsid - 1];
2113 2125 nvme_identify_nsid_t *idns;
2114 2126 int last_rp;
2115 2127
2116 2128 ns->ns_nvme = nvme;
2117 - idns = nvme_identify(nvme, nsid);
2118 2129
2119 - if (idns == NULL) {
2130 + if (nvme_identify(nvme, nsid, (void **)&idns) != 0) {
2120 2131 dev_err(nvme->n_dip, CE_WARN,
2121 2132 "!failed to identify namespace %d", nsid);
2122 2133 return (DDI_FAILURE);
2123 2134 }
2124 2135
2125 2136 ns->ns_idns = idns;
2126 2137 ns->ns_id = nsid;
2127 2138 ns->ns_block_count = idns->id_nsize;
2128 2139 ns->ns_block_size =
2129 2140 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads;
2130 2141 ns->ns_best_block_size = ns->ns_block_size;
2131 2142
2132 2143 /*
2133 2144 * Get the EUI64 if present. Use it for devid and device node names.
2134 2145 */
2135 2146 if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
2136 2147 bcopy(idns->id_eui64, ns->ns_eui64, sizeof (ns->ns_eui64));
2137 2148
2138 2149 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
2139 2150 if (*(uint64_t *)ns->ns_eui64 != 0) {
2140 2151 uint8_t *eui64 = ns->ns_eui64;
2141 2152
2142 2153 (void) snprintf(ns->ns_name, sizeof (ns->ns_name),
2143 2154 "%02x%02x%02x%02x%02x%02x%02x%02x",
2144 2155 eui64[0], eui64[1], eui64[2], eui64[3],
2145 2156 eui64[4], eui64[5], eui64[6], eui64[7]);
2146 2157 } else {
2147 2158 (void) snprintf(ns->ns_name, sizeof (ns->ns_name), "%d",
2148 2159 ns->ns_id);
2149 2160
2150 2161 nvme_prepare_devid(nvme, ns->ns_id);
2151 2162 }
2152 2163
2153 2164 /*
2154 2165 * Find the LBA format with no metadata and the best relative
2155 2166 * performance. A value of 3 means "degraded", 0 is best.
2156 2167 */
2157 2168 last_rp = 3;
2158 2169 for (int j = 0; j <= idns->id_nlbaf; j++) {
2159 2170 if (idns->id_lbaf[j].lbaf_lbads == 0)
2160 2171 break;
2161 2172 if (idns->id_lbaf[j].lbaf_ms != 0)
2162 2173 continue;
2163 2174 if (idns->id_lbaf[j].lbaf_rp >= last_rp)
2164 2175 continue;
2165 2176 last_rp = idns->id_lbaf[j].lbaf_rp;
2166 2177 ns->ns_best_block_size =
2167 2178 1 << idns->id_lbaf[j].lbaf_lbads;
2168 2179 }
2169 2180
2170 2181 if (ns->ns_best_block_size < nvme->n_min_block_size)
2171 2182 ns->ns_best_block_size = nvme->n_min_block_size;
2172 2183
2173 2184 /*
2174 2185 * We currently don't support namespaces that use either:
2175 2186 * - thin provisioning
2176 2187 * - protection information
2177 2188 * - illegal block size (< 512)
2178 2189 */
2179 2190 if (idns->id_nsfeat.f_thin ||
2180 2191 idns->id_dps.dp_pinfo) {
2181 2192 dev_err(nvme->n_dip, CE_WARN,
2182 2193 "!ignoring namespace %d, unsupported features: "
2183 2194 "thin = %d, pinfo = %d", nsid,
2184 2195 idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo);
2185 2196 ns->ns_ignore = B_TRUE;
2186 2197 } else if (ns->ns_block_size < 512) {
2187 2198 dev_err(nvme->n_dip, CE_WARN,
2188 2199 "!ignoring namespace %d, unsupported block size %"PRIu64,
2189 2200 nsid, (uint64_t)ns->ns_block_size);
2190 2201 ns->ns_ignore = B_TRUE;
2191 2202 } else {
2192 2203 ns->ns_ignore = B_FALSE;
2193 2204 }
2194 2205
2195 2206 return (DDI_SUCCESS);
2196 2207 }
2197 2208
2198 2209 static int
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
2199 2210 nvme_init(nvme_t *nvme)
2200 2211 {
2201 2212 nvme_reg_cc_t cc = { 0 };
2202 2213 nvme_reg_aqa_t aqa = { 0 };
2203 2214 nvme_reg_asq_t asq = { 0 };
2204 2215 nvme_reg_acq_t acq = { 0 };
2205 2216 nvme_reg_cap_t cap;
2206 2217 nvme_reg_vs_t vs;
2207 2218 nvme_reg_csts_t csts;
2208 2219 int i = 0;
2209 - int nqueues;
2220 + uint16_t nqueues;
2210 2221 char model[sizeof (nvme->n_idctl->id_model) + 1];
2211 2222 char *vendor, *product;
2212 2223
2213 2224 /* Check controller version */
2214 2225 vs.r = nvme_get32(nvme, NVME_REG_VS);
2215 2226 nvme->n_version.v_major = vs.b.vs_mjr;
2216 2227 nvme->n_version.v_minor = vs.b.vs_mnr;
2217 2228 dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d",
2218 2229 nvme->n_version.v_major, nvme->n_version.v_minor);
2219 2230
2220 2231 if (NVME_VERSION_HIGHER(&nvme->n_version,
2221 2232 nvme_version_major, nvme_version_minor)) {
2222 2233 dev_err(nvme->n_dip, CE_WARN, "!no support for version > %d.%d",
2223 2234 nvme_version_major, nvme_version_minor);
2224 2235 if (nvme->n_strict_version)
2225 2236 goto fail;
2226 2237 }
2227 2238
2228 2239 /* retrieve controller configuration */
2229 2240 cap.r = nvme_get64(nvme, NVME_REG_CAP);
2230 2241
2231 2242 if ((cap.b.cap_css & NVME_CAP_CSS_NVM) == 0) {
2232 2243 dev_err(nvme->n_dip, CE_WARN,
2233 2244 "!NVM command set not supported by hardware");
2234 2245 goto fail;
2235 2246 }
2236 2247
2237 2248 nvme->n_nssr_supported = cap.b.cap_nssrs;
2238 2249 nvme->n_doorbell_stride = 4 << cap.b.cap_dstrd;
2239 2250 nvme->n_timeout = cap.b.cap_to;
2240 2251 nvme->n_arbitration_mechanisms = cap.b.cap_ams;
2241 2252 nvme->n_cont_queues_reqd = cap.b.cap_cqr;
2242 2253 nvme->n_max_queue_entries = cap.b.cap_mqes + 1;
2243 2254
2244 2255 /*
2245 2256 * The MPSMIN and MPSMAX fields in the CAP register use 0 to specify
2246 2257 * the base page size of 4k (1<<12), so add 12 here to get the real
2247 2258 * page size value.
2248 2259 */
2249 2260 nvme->n_pageshift = MIN(MAX(cap.b.cap_mpsmin + 12, PAGESHIFT),
2250 2261 cap.b.cap_mpsmax + 12);
2251 2262 nvme->n_pagesize = 1UL << (nvme->n_pageshift);
2252 2263
2253 2264 /*
2254 2265 * Set up Queue DMA to transfer at least 1 page-aligned page at a time.
2255 2266 */
2256 2267 nvme->n_queue_dma_attr.dma_attr_align = nvme->n_pagesize;
2257 2268 nvme->n_queue_dma_attr.dma_attr_minxfer = nvme->n_pagesize;
2258 2269
2259 2270 /*
2260 2271 * Set up PRP DMA to transfer 1 page-aligned page at a time.
2261 2272 * Maxxfer may be increased after we identified the controller limits.
2262 2273 */
2263 2274 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_pagesize;
2264 2275 nvme->n_prp_dma_attr.dma_attr_minxfer = nvme->n_pagesize;
2265 2276 nvme->n_prp_dma_attr.dma_attr_align = nvme->n_pagesize;
2266 2277 nvme->n_prp_dma_attr.dma_attr_seg = nvme->n_pagesize - 1;
2267 2278
2268 2279 /*
2269 2280 * Reset controller if it's still in ready state.
2270 2281 */
2271 2282 if (nvme_reset(nvme, B_FALSE) == B_FALSE) {
2272 2283 dev_err(nvme->n_dip, CE_WARN, "!unable to reset controller");
2273 2284 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST);
2274 2285 nvme->n_dead = B_TRUE;
2275 2286 goto fail;
2276 2287 }
2277 2288
2278 2289 /*
2279 2290 * Create the admin queue pair.
2280 2291 */
2281 2292 if (nvme_alloc_qpair(nvme, nvme->n_admin_queue_len, &nvme->n_adminq, 0)
2282 2293 != DDI_SUCCESS) {
2283 2294 dev_err(nvme->n_dip, CE_WARN,
2284 2295 "!unable to allocate admin qpair");
2285 2296 goto fail;
2286 2297 }
2287 2298 nvme->n_ioq = kmem_alloc(sizeof (nvme_qpair_t *), KM_SLEEP);
2288 2299 nvme->n_ioq[0] = nvme->n_adminq;
2289 2300
2290 2301 nvme->n_progress |= NVME_ADMIN_QUEUE;
2291 2302
2292 2303 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip,
2293 2304 "admin-queue-len", nvme->n_admin_queue_len);
2294 2305
2295 2306 aqa.b.aqa_asqs = aqa.b.aqa_acqs = nvme->n_admin_queue_len - 1;
2296 2307 asq = nvme->n_adminq->nq_sqdma->nd_cookie.dmac_laddress;
2297 2308 acq = nvme->n_adminq->nq_cqdma->nd_cookie.dmac_laddress;
2298 2309
2299 2310 ASSERT((asq & (nvme->n_pagesize - 1)) == 0);
2300 2311 ASSERT((acq & (nvme->n_pagesize - 1)) == 0);
2301 2312
2302 2313 nvme_put32(nvme, NVME_REG_AQA, aqa.r);
2303 2314 nvme_put64(nvme, NVME_REG_ASQ, asq);
2304 2315 nvme_put64(nvme, NVME_REG_ACQ, acq);
2305 2316
2306 2317 cc.b.cc_ams = 0; /* use Round-Robin arbitration */
2307 2318 cc.b.cc_css = 0; /* use NVM command set */
2308 2319 cc.b.cc_mps = nvme->n_pageshift - 12;
2309 2320 cc.b.cc_shn = 0; /* no shutdown in progress */
2310 2321 cc.b.cc_en = 1; /* enable controller */
2311 2322 cc.b.cc_iosqes = 6; /* submission queue entry is 2^6 bytes long */
2312 2323 cc.b.cc_iocqes = 4; /* completion queue entry is 2^4 bytes long */
2313 2324
2314 2325 nvme_put32(nvme, NVME_REG_CC, cc.r);
2315 2326
2316 2327 /*
2317 2328 * Wait for the controller to become ready.
2318 2329 */
2319 2330 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2320 2331 if (csts.b.csts_rdy == 0) {
2321 2332 for (i = 0; i != nvme->n_timeout * 10; i++) {
2322 2333 delay(drv_usectohz(50000));
2323 2334 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
2324 2335
2325 2336 if (csts.b.csts_cfs == 1) {
2326 2337 dev_err(nvme->n_dip, CE_WARN,
2327 2338 "!controller fatal status at init");
2328 2339 ddi_fm_service_impact(nvme->n_dip,
2329 2340 DDI_SERVICE_LOST);
2330 2341 nvme->n_dead = B_TRUE;
2331 2342 goto fail;
2332 2343 }
2333 2344
2334 2345 if (csts.b.csts_rdy == 1)
2335 2346 break;
2336 2347 }
2337 2348 }
2338 2349
2339 2350 if (csts.b.csts_rdy == 0) {
2340 2351 dev_err(nvme->n_dip, CE_WARN, "!controller not ready");
2341 2352 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST);
2342 2353 nvme->n_dead = B_TRUE;
2343 2354 goto fail;
2344 2355 }
2345 2356
2346 2357 /*
2347 2358 * Assume an abort command limit of 1. We'll destroy and re-init
2348 2359 * that later when we know the true abort command limit.
2349 2360 */
2350 2361 sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL);
2351 2362
2352 2363 /*
2353 2364 * Setup initial interrupt for admin queue.
2354 2365 */
2355 2366 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1)
2356 2367 != DDI_SUCCESS) &&
2357 2368 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1)
2358 2369 != DDI_SUCCESS) &&
2359 2370 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1)
2360 2371 != DDI_SUCCESS)) {
2361 2372 dev_err(nvme->n_dip, CE_WARN,
2362 2373 "!failed to setup initial interrupt");
2363 2374 goto fail;
↓ open down ↓ |
144 lines elided |
↑ open up ↑ |
2364 2375 }
2365 2376
2366 2377 /*
2367 2378 * Post an asynchronous event command to catch errors.
2368 2379 */
2369 2380 nvme_async_event(nvme);
2370 2381
2371 2382 /*
2372 2383 * Identify Controller
2373 2384 */
2374 - nvme->n_idctl = nvme_identify(nvme, 0);
2375 - if (nvme->n_idctl == NULL) {
2385 + if (nvme_identify(nvme, 0, (void **)&nvme->n_idctl) != 0) {
2376 2386 dev_err(nvme->n_dip, CE_WARN,
2377 2387 "!failed to identify controller");
2378 2388 goto fail;
2379 2389 }
2380 2390
2381 2391 /*
2382 2392 * Get Vendor & Product ID
2383 2393 */
2384 2394 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model));
2385 2395 model[sizeof (nvme->n_idctl->id_model)] = '\0';
2386 2396 sata_split_model(model, &vendor, &product);
2387 2397
2388 2398 if (vendor == NULL)
2389 2399 nvme->n_vendor = strdup("NVMe");
2390 2400 else
2391 2401 nvme->n_vendor = strdup(vendor);
2392 2402
2393 2403 nvme->n_product = strdup(product);
2394 2404
2395 2405 /*
2396 2406 * Get controller limits.
2397 2407 */
2398 2408 nvme->n_async_event_limit = MAX(NVME_MIN_ASYNC_EVENT_LIMIT,
2399 2409 MIN(nvme->n_admin_queue_len / 10,
2400 2410 MIN(nvme->n_idctl->id_aerl + 1, nvme->n_async_event_limit)));
2401 2411
2402 2412 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip,
2403 2413 "async-event-limit", nvme->n_async_event_limit);
2404 2414
2405 2415 nvme->n_abort_command_limit = nvme->n_idctl->id_acl + 1;
2406 2416
2407 2417 /*
2408 2418 * Reinitialize the semaphore with the true abort command limit
2409 2419 * supported by the hardware. It's not necessary to disable interrupts
2410 2420 * as only command aborts use the semaphore, and no commands are
2411 2421 * executed or aborted while we're here.
2412 2422 */
2413 2423 sema_destroy(&nvme->n_abort_sema);
2414 2424 sema_init(&nvme->n_abort_sema, nvme->n_abort_command_limit - 1, NULL,
2415 2425 SEMA_DRIVER, NULL);
2416 2426
2417 2427 nvme->n_progress |= NVME_CTRL_LIMITS;
2418 2428
2419 2429 if (nvme->n_idctl->id_mdts == 0)
2420 2430 nvme->n_max_data_transfer_size = nvme->n_pagesize * 65536;
2421 2431 else
2422 2432 nvme->n_max_data_transfer_size =
2423 2433 1ull << (nvme->n_pageshift + nvme->n_idctl->id_mdts);
2424 2434
2425 2435 nvme->n_error_log_len = nvme->n_idctl->id_elpe + 1;
2426 2436
2427 2437 /*
2428 2438 * Limit n_max_data_transfer_size to what we can handle in one PRP.
2429 2439 * Chained PRPs are currently unsupported.
2430 2440 *
2431 2441 * This is a no-op on hardware which doesn't support a transfer size
2432 2442 * big enough to require chained PRPs.
2433 2443 */
2434 2444 nvme->n_max_data_transfer_size = MIN(nvme->n_max_data_transfer_size,
2435 2445 (nvme->n_pagesize / sizeof (uint64_t) * nvme->n_pagesize));
2436 2446
2437 2447 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_max_data_transfer_size;
2438 2448
2439 2449 /*
2440 2450 * Make sure the minimum/maximum queue entry sizes are not
2441 2451 * larger/smaller than the default.
2442 2452 */
2443 2453
2444 2454 if (((1 << nvme->n_idctl->id_sqes.qes_min) > sizeof (nvme_sqe_t)) ||
2445 2455 ((1 << nvme->n_idctl->id_sqes.qes_max) < sizeof (nvme_sqe_t)) ||
2446 2456 ((1 << nvme->n_idctl->id_cqes.qes_min) > sizeof (nvme_cqe_t)) ||
2447 2457 ((1 << nvme->n_idctl->id_cqes.qes_max) < sizeof (nvme_cqe_t)))
2448 2458 goto fail;
2449 2459
2450 2460 /*
2451 2461 * Check for the presence of a Volatile Write Cache. If present,
2452 2462 * enable or disable based on the value of the property
2453 2463 * volatile-write-cache-enable (default is enabled).
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
2454 2464 */
2455 2465 nvme->n_write_cache_present =
2456 2466 nvme->n_idctl->id_vwc.vwc_present == 0 ? B_FALSE : B_TRUE;
2457 2467
2458 2468 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip,
2459 2469 "volatile-write-cache-present",
2460 2470 nvme->n_write_cache_present ? 1 : 0);
2461 2471
2462 2472 if (!nvme->n_write_cache_present) {
2463 2473 nvme->n_write_cache_enabled = B_FALSE;
2464 - } else if (!nvme_write_cache_set(nvme, nvme->n_write_cache_enabled)) {
2474 + } else if (nvme_write_cache_set(nvme, nvme->n_write_cache_enabled)
2475 + != 0) {
2465 2476 dev_err(nvme->n_dip, CE_WARN,
2466 2477 "!failed to %sable volatile write cache",
2467 2478 nvme->n_write_cache_enabled ? "en" : "dis");
2468 2479 /*
2469 2480 * Assume the cache is (still) enabled.
2470 2481 */
2471 2482 nvme->n_write_cache_enabled = B_TRUE;
2472 2483 }
2473 2484
2474 2485 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip,
2475 2486 "volatile-write-cache-enable",
2476 2487 nvme->n_write_cache_enabled ? 1 : 0);
2477 2488
2478 2489 /*
2479 2490 * Assume LBA Range Type feature is supported. If it isn't this
2480 2491 * will be set to B_FALSE by nvme_get_features().
2481 2492 */
2482 2493 nvme->n_lba_range_supported = B_TRUE;
2483 2494
2484 2495 /*
2485 2496 * Check support for Autonomous Power State Transition.
2486 2497 */
2487 2498 if (NVME_VERSION_ATLEAST(&nvme->n_version, 1, 1))
2488 2499 nvme->n_auto_pst_supported =
2489 2500 nvme->n_idctl->id_apsta.ap_sup == 0 ? B_FALSE : B_TRUE;
2490 2501
2491 2502 /*
2492 2503 * Identify Namespaces
2493 2504 */
2494 2505 nvme->n_namespace_count = nvme->n_idctl->id_nn;
2495 2506 if (nvme->n_namespace_count > NVME_MINOR_MAX) {
2496 2507 dev_err(nvme->n_dip, CE_WARN,
2497 2508 "!too many namespaces: %d, limiting to %d\n",
2498 2509 nvme->n_namespace_count, NVME_MINOR_MAX);
2499 2510 nvme->n_namespace_count = NVME_MINOR_MAX;
2500 2511 }
2501 2512
2502 2513 nvme->n_ns = kmem_zalloc(sizeof (nvme_namespace_t) *
2503 2514 nvme->n_namespace_count, KM_SLEEP);
2504 2515
2505 2516 for (i = 0; i != nvme->n_namespace_count; i++) {
2506 2517 mutex_init(&nvme->n_ns[i].ns_minor.nm_mutex, NULL, MUTEX_DRIVER,
2507 2518 NULL);
2508 2519 if (nvme_init_ns(nvme, i + 1) != DDI_SUCCESS)
2509 2520 goto fail;
2510 2521 }
2511 2522
2512 2523 /*
2513 2524 * Try to set up MSI/MSI-X interrupts.
2514 2525 */
2515 2526 if ((nvme->n_intr_types & (DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX))
2516 2527 != 0) {
2517 2528 nvme_release_interrupts(nvme);
2518 2529
2519 2530 nqueues = MIN(UINT16_MAX, ncpus);
2520 2531
2521 2532 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX,
2522 2533 nqueues) != DDI_SUCCESS) &&
2523 2534 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI,
2524 2535 nqueues) != DDI_SUCCESS)) {
2525 2536 dev_err(nvme->n_dip, CE_WARN,
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
2526 2537 "!failed to setup MSI/MSI-X interrupts");
2527 2538 goto fail;
2528 2539 }
2529 2540 }
2530 2541
2531 2542 nqueues = nvme->n_intr_cnt;
2532 2543
2533 2544 /*
2534 2545 * Create I/O queue pairs.
2535 2546 */
2536 - nvme->n_ioq_count = nvme_set_nqueues(nvme, nqueues);
2537 - if (nvme->n_ioq_count == 0) {
2547 +
2548 + if (nvme_set_nqueues(nvme, &nqueues) != 0) {
2538 2549 dev_err(nvme->n_dip, CE_WARN,
2539 - "!failed to set number of I/O queues to %d", nqueues);
2550 + "!failed to set number of I/O queues to %d",
2551 + nvme->n_intr_cnt);
2540 2552 goto fail;
2541 2553 }
2542 2554
2543 2555 /*
2544 2556 * Reallocate I/O queue array
2545 2557 */
2546 2558 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *));
2547 2559 nvme->n_ioq = kmem_zalloc(sizeof (nvme_qpair_t *) *
2548 - (nvme->n_ioq_count + 1), KM_SLEEP);
2560 + (nqueues + 1), KM_SLEEP);
2549 2561 nvme->n_ioq[0] = nvme->n_adminq;
2550 2562
2563 + nvme->n_ioq_count = nqueues;
2564 +
2551 2565 /*
2552 2566 * If we got less queues than we asked for we might as well give
2553 2567 * some of the interrupt vectors back to the system.
2554 2568 */
2555 - if (nvme->n_ioq_count < nqueues) {
2569 + if (nvme->n_ioq_count < nvme->n_intr_cnt) {
2556 2570 nvme_release_interrupts(nvme);
2557 2571
2558 2572 if (nvme_setup_interrupts(nvme, nvme->n_intr_type,
2559 2573 nvme->n_ioq_count) != DDI_SUCCESS) {
2560 2574 dev_err(nvme->n_dip, CE_WARN,
2561 2575 "!failed to reduce number of interrupts");
2562 2576 goto fail;
2563 2577 }
2564 2578 }
2565 2579
2566 2580 /*
2567 2581 * Alloc & register I/O queue pairs
2568 2582 */
2569 2583 nvme->n_io_queue_len =
2570 2584 MIN(nvme->n_io_queue_len, nvme->n_max_queue_entries);
2571 2585 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, "io-queue-len",
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
2572 2586 nvme->n_io_queue_len);
2573 2587
2574 2588 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
2575 2589 if (nvme_alloc_qpair(nvme, nvme->n_io_queue_len,
2576 2590 &nvme->n_ioq[i], i) != DDI_SUCCESS) {
2577 2591 dev_err(nvme->n_dip, CE_WARN,
2578 2592 "!unable to allocate I/O qpair %d", i);
2579 2593 goto fail;
2580 2594 }
2581 2595
2582 - if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i)
2583 - != DDI_SUCCESS) {
2596 + if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i) != 0) {
2584 2597 dev_err(nvme->n_dip, CE_WARN,
2585 2598 "!unable to create I/O qpair %d", i);
2586 2599 goto fail;
2587 2600 }
2588 2601 }
2589 2602
2590 2603 /*
2591 2604 * Post more asynchronous events commands to reduce event reporting
2592 2605 * latency as suggested by the spec.
2593 2606 */
2594 2607 for (i = 1; i != nvme->n_async_event_limit; i++)
2595 2608 nvme_async_event(nvme);
2596 2609
2597 2610 return (DDI_SUCCESS);
2598 2611
2599 2612 fail:
2600 2613 (void) nvme_reset(nvme, B_FALSE);
2601 2614 return (DDI_FAILURE);
2602 2615 }
2603 2616
2604 2617 static uint_t
2605 2618 nvme_intr(caddr_t arg1, caddr_t arg2)
2606 2619 {
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
2607 2620 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2608 2621 nvme_t *nvme = (nvme_t *)arg1;
2609 2622 int inum = (int)(uintptr_t)arg2;
2610 2623 int ccnt = 0;
2611 2624 int qnum;
2612 2625 nvme_cmd_t *cmd;
2613 2626
2614 2627 if (inum >= nvme->n_intr_cnt)
2615 2628 return (DDI_INTR_UNCLAIMED);
2616 2629
2630 + if (nvme->n_dead)
2631 + return (nvme->n_intr_type == DDI_INTR_TYPE_FIXED ?
2632 + DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
2633 +
2617 2634 /*
2618 2635 * The interrupt vector a queue uses is calculated as queue_idx %
2619 2636 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array
2620 2637 * in steps of n_intr_cnt to process all queues using this vector.
2621 2638 */
2622 2639 for (qnum = inum;
2623 2640 qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL;
2624 2641 qnum += nvme->n_intr_cnt) {
2625 2642 while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) {
2626 2643 taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq,
2627 2644 cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent);
2628 2645 ccnt++;
2629 2646 }
2630 2647 }
2631 2648
2632 2649 return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
2633 2650 }
2634 2651
2635 2652 static void
2636 2653 nvme_release_interrupts(nvme_t *nvme)
2637 2654 {
2638 2655 int i;
2639 2656
2640 2657 for (i = 0; i < nvme->n_intr_cnt; i++) {
2641 2658 if (nvme->n_inth[i] == NULL)
2642 2659 break;
2643 2660
2644 2661 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK)
2645 2662 (void) ddi_intr_block_disable(&nvme->n_inth[i], 1);
2646 2663 else
2647 2664 (void) ddi_intr_disable(nvme->n_inth[i]);
2648 2665
2649 2666 (void) ddi_intr_remove_handler(nvme->n_inth[i]);
2650 2667 (void) ddi_intr_free(nvme->n_inth[i]);
2651 2668 }
2652 2669
2653 2670 kmem_free(nvme->n_inth, nvme->n_inth_sz);
2654 2671 nvme->n_inth = NULL;
2655 2672 nvme->n_inth_sz = 0;
2656 2673
2657 2674 nvme->n_progress &= ~NVME_INTERRUPTS;
2658 2675 }
2659 2676
2660 2677 static int
2661 2678 nvme_setup_interrupts(nvme_t *nvme, int intr_type, int nqpairs)
2662 2679 {
2663 2680 int nintrs, navail, count;
2664 2681 int ret;
2665 2682 int i;
2666 2683
2667 2684 if (nvme->n_intr_types == 0) {
2668 2685 ret = ddi_intr_get_supported_types(nvme->n_dip,
2669 2686 &nvme->n_intr_types);
2670 2687 if (ret != DDI_SUCCESS) {
2671 2688 dev_err(nvme->n_dip, CE_WARN,
2672 2689 "!%s: ddi_intr_get_supported types failed",
2673 2690 __func__);
2674 2691 return (ret);
2675 2692 }
2676 2693 #ifdef __x86
2677 2694 if (get_hwenv() == HW_VMWARE)
2678 2695 nvme->n_intr_types &= ~DDI_INTR_TYPE_MSIX;
2679 2696 #endif
2680 2697 }
2681 2698
2682 2699 if ((nvme->n_intr_types & intr_type) == 0)
2683 2700 return (DDI_FAILURE);
2684 2701
2685 2702 ret = ddi_intr_get_nintrs(nvme->n_dip, intr_type, &nintrs);
2686 2703 if (ret != DDI_SUCCESS) {
2687 2704 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_nintrs failed",
2688 2705 __func__);
2689 2706 return (ret);
2690 2707 }
2691 2708
2692 2709 ret = ddi_intr_get_navail(nvme->n_dip, intr_type, &navail);
2693 2710 if (ret != DDI_SUCCESS) {
2694 2711 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_navail failed",
2695 2712 __func__);
2696 2713 return (ret);
2697 2714 }
2698 2715
2699 2716 /* We want at most one interrupt per queue pair. */
2700 2717 if (navail > nqpairs)
2701 2718 navail = nqpairs;
2702 2719
2703 2720 nvme->n_inth_sz = sizeof (ddi_intr_handle_t) * navail;
2704 2721 nvme->n_inth = kmem_zalloc(nvme->n_inth_sz, KM_SLEEP);
2705 2722
2706 2723 ret = ddi_intr_alloc(nvme->n_dip, nvme->n_inth, intr_type, 0, navail,
2707 2724 &count, 0);
2708 2725 if (ret != DDI_SUCCESS) {
2709 2726 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_alloc failed",
2710 2727 __func__);
2711 2728 goto fail;
2712 2729 }
2713 2730
2714 2731 nvme->n_intr_cnt = count;
2715 2732
2716 2733 ret = ddi_intr_get_pri(nvme->n_inth[0], &nvme->n_intr_pri);
2717 2734 if (ret != DDI_SUCCESS) {
2718 2735 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_pri failed",
2719 2736 __func__);
2720 2737 goto fail;
2721 2738 }
2722 2739
2723 2740 for (i = 0; i < count; i++) {
2724 2741 ret = ddi_intr_add_handler(nvme->n_inth[i], nvme_intr,
2725 2742 (void *)nvme, (void *)(uintptr_t)i);
2726 2743 if (ret != DDI_SUCCESS) {
2727 2744 dev_err(nvme->n_dip, CE_WARN,
2728 2745 "!%s: ddi_intr_add_handler failed", __func__);
2729 2746 goto fail;
2730 2747 }
2731 2748 }
2732 2749
2733 2750 (void) ddi_intr_get_cap(nvme->n_inth[0], &nvme->n_intr_cap);
2734 2751
2735 2752 for (i = 0; i < count; i++) {
2736 2753 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK)
2737 2754 ret = ddi_intr_block_enable(&nvme->n_inth[i], 1);
2738 2755 else
2739 2756 ret = ddi_intr_enable(nvme->n_inth[i]);
2740 2757
2741 2758 if (ret != DDI_SUCCESS) {
2742 2759 dev_err(nvme->n_dip, CE_WARN,
2743 2760 "!%s: enabling interrupt %d failed", __func__, i);
2744 2761 goto fail;
2745 2762 }
2746 2763 }
2747 2764
2748 2765 nvme->n_intr_type = intr_type;
2749 2766
2750 2767 nvme->n_progress |= NVME_INTERRUPTS;
2751 2768
2752 2769 return (DDI_SUCCESS);
2753 2770
2754 2771 fail:
2755 2772 nvme_release_interrupts(nvme);
2756 2773
2757 2774 return (ret);
2758 2775 }
2759 2776
2760 2777 static int
2761 2778 nvme_fm_errcb(dev_info_t *dip, ddi_fm_error_t *fm_error, const void *arg)
2762 2779 {
2763 2780 _NOTE(ARGUNUSED(arg));
2764 2781
2765 2782 pci_ereport_post(dip, fm_error, NULL);
2766 2783 return (fm_error->fme_status);
2767 2784 }
2768 2785
2769 2786 static int
2770 2787 nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2771 2788 {
2772 2789 nvme_t *nvme;
2773 2790 int instance;
2774 2791 int nregs;
2775 2792 off_t regsize;
2776 2793 int i;
2777 2794 char name[32];
2778 2795
2779 2796 if (cmd != DDI_ATTACH)
2780 2797 return (DDI_FAILURE);
2781 2798
2782 2799 instance = ddi_get_instance(dip);
2783 2800
2784 2801 if (ddi_soft_state_zalloc(nvme_state, instance) != DDI_SUCCESS)
2785 2802 return (DDI_FAILURE);
2786 2803
2787 2804 nvme = ddi_get_soft_state(nvme_state, instance);
2788 2805 ddi_set_driver_private(dip, nvme);
2789 2806 nvme->n_dip = dip;
2790 2807
2791 2808 mutex_init(&nvme->n_minor.nm_mutex, NULL, MUTEX_DRIVER, NULL);
2792 2809
2793 2810 nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2794 2811 DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE;
2795 2812 nvme->n_ignore_unknown_vendor_status = ddi_prop_get_int(DDI_DEV_T_ANY,
2796 2813 dip, DDI_PROP_DONTPASS, "ignore-unknown-vendor-status", 0) == 1 ?
2797 2814 B_TRUE : B_FALSE;
2798 2815 nvme->n_admin_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2799 2816 DDI_PROP_DONTPASS, "admin-queue-len", NVME_DEFAULT_ADMIN_QUEUE_LEN);
2800 2817 nvme->n_io_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2801 2818 DDI_PROP_DONTPASS, "io-queue-len", NVME_DEFAULT_IO_QUEUE_LEN);
2802 2819 nvme->n_async_event_limit = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2803 2820 DDI_PROP_DONTPASS, "async-event-limit",
2804 2821 NVME_DEFAULT_ASYNC_EVENT_LIMIT);
2805 2822 nvme->n_write_cache_enabled = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2806 2823 DDI_PROP_DONTPASS, "volatile-write-cache-enable", 1) != 0 ?
2807 2824 B_TRUE : B_FALSE;
2808 2825 nvme->n_min_block_size = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2809 2826 DDI_PROP_DONTPASS, "min-phys-block-size",
2810 2827 NVME_DEFAULT_MIN_BLOCK_SIZE);
2811 2828
2812 2829 if (!ISP2(nvme->n_min_block_size) ||
2813 2830 (nvme->n_min_block_size < NVME_DEFAULT_MIN_BLOCK_SIZE)) {
2814 2831 dev_err(dip, CE_WARN, "!min-phys-block-size %s, "
2815 2832 "using default %d", ISP2(nvme->n_min_block_size) ?
2816 2833 "too low" : "not a power of 2",
2817 2834 NVME_DEFAULT_MIN_BLOCK_SIZE);
2818 2835 nvme->n_min_block_size = NVME_DEFAULT_MIN_BLOCK_SIZE;
2819 2836 }
2820 2837
2821 2838 if (nvme->n_admin_queue_len < NVME_MIN_ADMIN_QUEUE_LEN)
2822 2839 nvme->n_admin_queue_len = NVME_MIN_ADMIN_QUEUE_LEN;
2823 2840 else if (nvme->n_admin_queue_len > NVME_MAX_ADMIN_QUEUE_LEN)
2824 2841 nvme->n_admin_queue_len = NVME_MAX_ADMIN_QUEUE_LEN;
2825 2842
2826 2843 if (nvme->n_io_queue_len < NVME_MIN_IO_QUEUE_LEN)
2827 2844 nvme->n_io_queue_len = NVME_MIN_IO_QUEUE_LEN;
2828 2845
2829 2846 if (nvme->n_async_event_limit < 1)
2830 2847 nvme->n_async_event_limit = NVME_DEFAULT_ASYNC_EVENT_LIMIT;
2831 2848
2832 2849 nvme->n_reg_acc_attr = nvme_reg_acc_attr;
2833 2850 nvme->n_queue_dma_attr = nvme_queue_dma_attr;
2834 2851 nvme->n_prp_dma_attr = nvme_prp_dma_attr;
2835 2852 nvme->n_sgl_dma_attr = nvme_sgl_dma_attr;
2836 2853
2837 2854 /*
2838 2855 * Setup FMA support.
2839 2856 */
2840 2857 nvme->n_fm_cap = ddi_getprop(DDI_DEV_T_ANY, dip,
2841 2858 DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "fm-capable",
2842 2859 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
2843 2860 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
2844 2861
2845 2862 ddi_fm_init(dip, &nvme->n_fm_cap, &nvme->n_fm_ibc);
2846 2863
2847 2864 if (nvme->n_fm_cap) {
2848 2865 if (nvme->n_fm_cap & DDI_FM_ACCCHK_CAPABLE)
2849 2866 nvme->n_reg_acc_attr.devacc_attr_access =
2850 2867 DDI_FLAGERR_ACC;
2851 2868
2852 2869 if (nvme->n_fm_cap & DDI_FM_DMACHK_CAPABLE) {
2853 2870 nvme->n_prp_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR;
2854 2871 nvme->n_sgl_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR;
2855 2872 }
2856 2873
2857 2874 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) ||
2858 2875 DDI_FM_ERRCB_CAP(nvme->n_fm_cap))
2859 2876 pci_ereport_setup(dip);
2860 2877
2861 2878 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap))
2862 2879 ddi_fm_handler_register(dip, nvme_fm_errcb,
2863 2880 (void *)nvme);
2864 2881 }
2865 2882
2866 2883 nvme->n_progress |= NVME_FMA_INIT;
2867 2884
2868 2885 /*
2869 2886 * The spec defines several register sets. Only the controller
2870 2887 * registers (set 1) are currently used.
2871 2888 */
2872 2889 if (ddi_dev_nregs(dip, &nregs) == DDI_FAILURE ||
2873 2890 nregs < 2 ||
2874 2891 ddi_dev_regsize(dip, 1, ®size) == DDI_FAILURE)
2875 2892 goto fail;
2876 2893
2877 2894 if (ddi_regs_map_setup(dip, 1, &nvme->n_regs, 0, regsize,
2878 2895 &nvme->n_reg_acc_attr, &nvme->n_regh) != DDI_SUCCESS) {
2879 2896 dev_err(dip, CE_WARN, "!failed to map regset 1");
2880 2897 goto fail;
2881 2898 }
2882 2899
2883 2900 nvme->n_progress |= NVME_REGS_MAPPED;
2884 2901
2885 2902 /*
2886 2903 * Create taskq for command completion.
2887 2904 */
2888 2905 (void) snprintf(name, sizeof (name), "%s%d_cmd_taskq",
2889 2906 ddi_driver_name(dip), ddi_get_instance(dip));
2890 2907 nvme->n_cmd_taskq = ddi_taskq_create(dip, name, MIN(UINT16_MAX, ncpus),
2891 2908 TASKQ_DEFAULTPRI, 0);
2892 2909 if (nvme->n_cmd_taskq == NULL) {
2893 2910 dev_err(dip, CE_WARN, "!failed to create cmd taskq");
2894 2911 goto fail;
2895 2912 }
2896 2913
2897 2914 /*
2898 2915 * Create PRP DMA cache
2899 2916 */
2900 2917 (void) snprintf(name, sizeof (name), "%s%d_prp_cache",
2901 2918 ddi_driver_name(dip), ddi_get_instance(dip));
2902 2919 nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t),
2903 2920 0, nvme_prp_dma_constructor, nvme_prp_dma_destructor,
2904 2921 NULL, (void *)nvme, NULL, 0);
2905 2922
2906 2923 if (nvme_init(nvme) != DDI_SUCCESS)
2907 2924 goto fail;
2908 2925
2909 2926 /*
2910 2927 * Attach the blkdev driver for each namespace.
2911 2928 */
2912 2929 for (i = 0; i != nvme->n_namespace_count; i++) {
2913 2930 if (ddi_create_minor_node(nvme->n_dip, nvme->n_ns[i].ns_name,
2914 2931 S_IFCHR, NVME_MINOR(ddi_get_instance(nvme->n_dip), i + 1),
2915 2932 DDI_NT_NVME_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
2916 2933 dev_err(dip, CE_WARN,
2917 2934 "!failed to create minor node for namespace %d", i);
2918 2935 goto fail;
2919 2936 }
2920 2937
2921 2938 if (nvme->n_ns[i].ns_ignore)
2922 2939 continue;
2923 2940
2924 2941 nvme->n_ns[i].ns_bd_hdl = bd_alloc_handle(&nvme->n_ns[i],
2925 2942 &nvme_bd_ops, &nvme->n_prp_dma_attr, KM_SLEEP);
2926 2943
2927 2944 if (nvme->n_ns[i].ns_bd_hdl == NULL) {
2928 2945 dev_err(dip, CE_WARN,
2929 2946 "!failed to get blkdev handle for namespace %d", i);
2930 2947 goto fail;
2931 2948 }
2932 2949
2933 2950 if (bd_attach_handle(dip, nvme->n_ns[i].ns_bd_hdl)
2934 2951 != DDI_SUCCESS) {
2935 2952 dev_err(dip, CE_WARN,
2936 2953 "!failed to attach blkdev handle for namespace %d",
2937 2954 i);
2938 2955 goto fail;
2939 2956 }
2940 2957 }
2941 2958
2942 2959 if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
2943 2960 NVME_MINOR(ddi_get_instance(dip), 0), DDI_NT_NVME_NEXUS, 0)
2944 2961 != DDI_SUCCESS) {
2945 2962 dev_err(dip, CE_WARN, "nvme_attach: "
2946 2963 "cannot create devctl minor node");
2947 2964 goto fail;
2948 2965 }
2949 2966
2950 2967 return (DDI_SUCCESS);
2951 2968
2952 2969 fail:
2953 2970 /* attach successful anyway so that FMA can retire the device */
2954 2971 if (nvme->n_dead)
2955 2972 return (DDI_SUCCESS);
2956 2973
2957 2974 (void) nvme_detach(dip, DDI_DETACH);
2958 2975
2959 2976 return (DDI_FAILURE);
2960 2977 }
2961 2978
2962 2979 static int
2963 2980 nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2964 2981 {
2965 2982 int instance, i;
2966 2983 nvme_t *nvme;
2967 2984
2968 2985 if (cmd != DDI_DETACH)
2969 2986 return (DDI_FAILURE);
2970 2987
2971 2988 instance = ddi_get_instance(dip);
2972 2989
2973 2990 nvme = ddi_get_soft_state(nvme_state, instance);
2974 2991
2975 2992 if (nvme == NULL)
2976 2993 return (DDI_FAILURE);
2977 2994
2978 2995 ddi_remove_minor_node(dip, "devctl");
2979 2996 mutex_destroy(&nvme->n_minor.nm_mutex);
2980 2997
2981 2998 if (nvme->n_ns) {
2982 2999 for (i = 0; i != nvme->n_namespace_count; i++) {
2983 3000 ddi_remove_minor_node(dip, nvme->n_ns[i].ns_name);
2984 3001 mutex_destroy(&nvme->n_ns[i].ns_minor.nm_mutex);
2985 3002
2986 3003 if (nvme->n_ns[i].ns_bd_hdl) {
2987 3004 (void) bd_detach_handle(
2988 3005 nvme->n_ns[i].ns_bd_hdl);
2989 3006 bd_free_handle(nvme->n_ns[i].ns_bd_hdl);
2990 3007 }
2991 3008
2992 3009 if (nvme->n_ns[i].ns_idns)
2993 3010 kmem_free(nvme->n_ns[i].ns_idns,
2994 3011 sizeof (nvme_identify_nsid_t));
2995 3012 if (nvme->n_ns[i].ns_devid)
2996 3013 strfree(nvme->n_ns[i].ns_devid);
2997 3014 }
2998 3015
2999 3016 kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) *
3000 3017 nvme->n_namespace_count);
3001 3018 }
3002 3019
3003 3020 if (nvme->n_progress & NVME_INTERRUPTS)
3004 3021 nvme_release_interrupts(nvme);
3005 3022
3006 3023 if (nvme->n_cmd_taskq)
3007 3024 ddi_taskq_wait(nvme->n_cmd_taskq);
3008 3025
3009 3026 if (nvme->n_ioq_count > 0) {
3010 3027 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
3011 3028 if (nvme->n_ioq[i] != NULL) {
3012 3029 /* TODO: send destroy queue commands */
3013 3030 nvme_free_qpair(nvme->n_ioq[i]);
3014 3031 }
3015 3032 }
3016 3033
3017 3034 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
3018 3035 (nvme->n_ioq_count + 1));
3019 3036 }
3020 3037
3021 3038 if (nvme->n_prp_cache != NULL) {
3022 3039 kmem_cache_destroy(nvme->n_prp_cache);
3023 3040 }
3024 3041
3025 3042 if (nvme->n_progress & NVME_REGS_MAPPED) {
3026 3043 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
3027 3044 (void) nvme_reset(nvme, B_FALSE);
3028 3045 }
3029 3046
3030 3047 if (nvme->n_cmd_taskq)
3031 3048 ddi_taskq_destroy(nvme->n_cmd_taskq);
3032 3049
3033 3050 if (nvme->n_progress & NVME_CTRL_LIMITS)
3034 3051 sema_destroy(&nvme->n_abort_sema);
3035 3052
3036 3053 if (nvme->n_progress & NVME_ADMIN_QUEUE)
3037 3054 nvme_free_qpair(nvme->n_adminq);
3038 3055
3039 3056 if (nvme->n_idctl)
3040 3057 kmem_free(nvme->n_idctl, NVME_IDENTIFY_BUFSIZE);
3041 3058
3042 3059 if (nvme->n_progress & NVME_REGS_MAPPED)
3043 3060 ddi_regs_map_free(&nvme->n_regh);
3044 3061
3045 3062 if (nvme->n_progress & NVME_FMA_INIT) {
3046 3063 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap))
3047 3064 ddi_fm_handler_unregister(nvme->n_dip);
3048 3065
3049 3066 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) ||
3050 3067 DDI_FM_ERRCB_CAP(nvme->n_fm_cap))
3051 3068 pci_ereport_teardown(nvme->n_dip);
3052 3069
3053 3070 ddi_fm_fini(nvme->n_dip);
3054 3071 }
3055 3072
3056 3073 if (nvme->n_vendor != NULL)
3057 3074 strfree(nvme->n_vendor);
3058 3075
3059 3076 if (nvme->n_product != NULL)
3060 3077 strfree(nvme->n_product);
3061 3078
3062 3079 ddi_soft_state_free(nvme_state, instance);
3063 3080
3064 3081 return (DDI_SUCCESS);
3065 3082 }
3066 3083
3067 3084 static int
3068 3085 nvme_quiesce(dev_info_t *dip)
3069 3086 {
3070 3087 int instance;
3071 3088 nvme_t *nvme;
3072 3089
3073 3090 instance = ddi_get_instance(dip);
3074 3091
3075 3092 nvme = ddi_get_soft_state(nvme_state, instance);
3076 3093
3077 3094 if (nvme == NULL)
3078 3095 return (DDI_FAILURE);
3079 3096
3080 3097 nvme_shutdown(nvme, NVME_CC_SHN_ABRUPT, B_TRUE);
3081 3098
3082 3099 (void) nvme_reset(nvme, B_TRUE);
3083 3100
3084 3101 return (DDI_FAILURE);
3085 3102 }
3086 3103
3087 3104 static int
3088 3105 nvme_fill_prp(nvme_cmd_t *cmd, bd_xfer_t *xfer)
3089 3106 {
3090 3107 nvme_t *nvme = cmd->nc_nvme;
3091 3108 int nprp_page, nprp;
3092 3109 uint64_t *prp;
3093 3110
3094 3111 if (xfer->x_ndmac == 0)
3095 3112 return (DDI_FAILURE);
3096 3113
3097 3114 cmd->nc_sqe.sqe_dptr.d_prp[0] = xfer->x_dmac.dmac_laddress;
3098 3115 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac);
3099 3116
3100 3117 if (xfer->x_ndmac == 1) {
3101 3118 cmd->nc_sqe.sqe_dptr.d_prp[1] = 0;
3102 3119 return (DDI_SUCCESS);
3103 3120 } else if (xfer->x_ndmac == 2) {
3104 3121 cmd->nc_sqe.sqe_dptr.d_prp[1] = xfer->x_dmac.dmac_laddress;
3105 3122 return (DDI_SUCCESS);
3106 3123 }
3107 3124
3108 3125 xfer->x_ndmac--;
3109 3126
3110 3127 nprp_page = nvme->n_pagesize / sizeof (uint64_t) - 1;
3111 3128 ASSERT(nprp_page > 0);
3112 3129 nprp = (xfer->x_ndmac + nprp_page - 1) / nprp_page;
3113 3130
3114 3131 /*
3115 3132 * We currently don't support chained PRPs and set up our DMA
3116 3133 * attributes to reflect that. If we still get an I/O request
3117 3134 * that needs a chained PRP something is very wrong.
3118 3135 */
3119 3136 VERIFY(nprp == 1);
3120 3137
3121 3138 cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP);
3122 3139 bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len);
3123 3140
3124 3141 cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress;
3125 3142
3126 3143 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
3127 3144 for (prp = (uint64_t *)cmd->nc_dma->nd_memp;
3128 3145 xfer->x_ndmac > 0;
3129 3146 prp++, xfer->x_ndmac--) {
3130 3147 *prp = xfer->x_dmac.dmac_laddress;
3131 3148 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac);
3132 3149 }
3133 3150
3134 3151 (void) ddi_dma_sync(cmd->nc_dma->nd_dmah, 0, cmd->nc_dma->nd_len,
3135 3152 DDI_DMA_SYNC_FORDEV);
3136 3153 return (DDI_SUCCESS);
3137 3154 }
3138 3155
3139 3156 static nvme_cmd_t *
3140 3157 nvme_create_nvm_cmd(nvme_namespace_t *ns, uint8_t opc, bd_xfer_t *xfer)
3141 3158 {
3142 3159 nvme_t *nvme = ns->ns_nvme;
3143 3160 nvme_cmd_t *cmd;
3144 3161
3145 3162 /*
3146 3163 * Blkdev only sets BD_XFER_POLL when dumping, so don't sleep.
3147 3164 */
3148 3165 cmd = nvme_alloc_cmd(nvme, (xfer->x_flags & BD_XFER_POLL) ?
3149 3166 KM_NOSLEEP : KM_SLEEP);
3150 3167
3151 3168 if (cmd == NULL)
3152 3169 return (NULL);
3153 3170
3154 3171 cmd->nc_sqe.sqe_opc = opc;
3155 3172 cmd->nc_callback = nvme_bd_xfer_done;
3156 3173 cmd->nc_xfer = xfer;
3157 3174
3158 3175 switch (opc) {
3159 3176 case NVME_OPC_NVM_WRITE:
3160 3177 case NVME_OPC_NVM_READ:
3161 3178 VERIFY(xfer->x_nblks <= 0x10000);
3162 3179
3163 3180 cmd->nc_sqe.sqe_nsid = ns->ns_id;
3164 3181
3165 3182 cmd->nc_sqe.sqe_cdw10 = xfer->x_blkno & 0xffffffffu;
3166 3183 cmd->nc_sqe.sqe_cdw11 = (xfer->x_blkno >> 32);
3167 3184 cmd->nc_sqe.sqe_cdw12 = (uint16_t)(xfer->x_nblks - 1);
3168 3185
3169 3186 if (nvme_fill_prp(cmd, xfer) != DDI_SUCCESS)
3170 3187 goto fail;
3171 3188 break;
3172 3189
3173 3190 case NVME_OPC_NVM_FLUSH:
3174 3191 cmd->nc_sqe.sqe_nsid = ns->ns_id;
3175 3192 break;
3176 3193
3177 3194 default:
3178 3195 goto fail;
3179 3196 }
3180 3197
3181 3198 return (cmd);
3182 3199
3183 3200 fail:
3184 3201 nvme_free_cmd(cmd);
3185 3202 return (NULL);
3186 3203 }
3187 3204
3188 3205 static void
3189 3206 nvme_bd_xfer_done(void *arg)
3190 3207 {
3191 3208 nvme_cmd_t *cmd = arg;
3192 3209 bd_xfer_t *xfer = cmd->nc_xfer;
3193 3210 int error = 0;
3194 3211
3195 3212 error = nvme_check_cmd_status(cmd);
3196 3213 nvme_free_cmd(cmd);
3197 3214
3198 3215 bd_xfer_done(xfer, error);
3199 3216 }
3200 3217
3201 3218 static void
3202 3219 nvme_bd_driveinfo(void *arg, bd_drive_t *drive)
3203 3220 {
3204 3221 nvme_namespace_t *ns = arg;
3205 3222 nvme_t *nvme = ns->ns_nvme;
3206 3223
3207 3224 /*
3208 3225 * blkdev maintains one queue size per instance (namespace),
3209 3226 * but all namespace share the I/O queues.
3210 3227 * TODO: need to figure out a sane default, or use per-NS I/O queues,
3211 3228 * or change blkdev to handle EAGAIN
3212 3229 */
3213 3230 drive->d_qsize = nvme->n_ioq_count * nvme->n_io_queue_len
3214 3231 / nvme->n_namespace_count;
3215 3232
3216 3233 /*
3217 3234 * d_maxxfer is not set, which means the value is taken from the DMA
3218 3235 * attributes specified to bd_alloc_handle.
3219 3236 */
3220 3237
3221 3238 drive->d_removable = B_FALSE;
3222 3239 drive->d_hotpluggable = B_FALSE;
3223 3240
3224 3241 bcopy(ns->ns_eui64, drive->d_eui64, sizeof (drive->d_eui64));
3225 3242 drive->d_target = ns->ns_id;
3226 3243 drive->d_lun = 0;
3227 3244
3228 3245 drive->d_model = nvme->n_idctl->id_model;
3229 3246 drive->d_model_len = sizeof (nvme->n_idctl->id_model);
3230 3247 drive->d_vendor = nvme->n_vendor;
3231 3248 drive->d_vendor_len = strlen(nvme->n_vendor);
3232 3249 drive->d_product = nvme->n_product;
3233 3250 drive->d_product_len = strlen(nvme->n_product);
3234 3251 drive->d_serial = nvme->n_idctl->id_serial;
3235 3252 drive->d_serial_len = sizeof (nvme->n_idctl->id_serial);
3236 3253 drive->d_revision = nvme->n_idctl->id_fwrev;
3237 3254 drive->d_revision_len = sizeof (nvme->n_idctl->id_fwrev);
3238 3255 }
3239 3256
3240 3257 static int
3241 3258 nvme_bd_mediainfo(void *arg, bd_media_t *media)
3242 3259 {
3243 3260 nvme_namespace_t *ns = arg;
3244 3261
3245 3262 media->m_nblks = ns->ns_block_count;
3246 3263 media->m_blksize = ns->ns_block_size;
3247 3264 media->m_readonly = B_FALSE;
3248 3265 media->m_solidstate = B_TRUE;
3249 3266
3250 3267 media->m_pblksize = ns->ns_best_block_size;
3251 3268
3252 3269 return (0);
3253 3270 }
3254 3271
3255 3272 static int
3256 3273 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
3257 3274 {
3258 3275 nvme_t *nvme = ns->ns_nvme;
3259 3276 nvme_cmd_t *cmd;
3260 3277 nvme_qpair_t *ioq;
3261 3278 boolean_t poll;
3262 3279 int ret;
3263 3280
3264 3281 if (nvme->n_dead)
3265 3282 return (EIO);
3266 3283
3267 3284 cmd = nvme_create_nvm_cmd(ns, opc, xfer);
3268 3285 if (cmd == NULL)
3269 3286 return (ENOMEM);
3270 3287
3271 3288 cmd->nc_sqid = (CPU->cpu_id % nvme->n_ioq_count) + 1;
3272 3289 ASSERT(cmd->nc_sqid <= nvme->n_ioq_count);
3273 3290 ioq = nvme->n_ioq[cmd->nc_sqid];
3274 3291
3275 3292 /*
3276 3293 * Get the polling flag before submitting the command. The command may
3277 3294 * complete immediately after it was submitted, which means we must
3278 3295 * treat both cmd and xfer as if they have been freed already.
3279 3296 */
3280 3297 poll = (xfer->x_flags & BD_XFER_POLL) != 0;
3281 3298
3282 3299 ret = nvme_submit_io_cmd(ioq, cmd);
3283 3300
3284 3301 if (ret != 0)
3285 3302 return (ret);
3286 3303
3287 3304 if (!poll)
3288 3305 return (0);
3289 3306
3290 3307 do {
3291 3308 cmd = nvme_retrieve_cmd(nvme, ioq);
3292 3309 if (cmd != NULL)
3293 3310 nvme_bd_xfer_done(cmd);
3294 3311 else
3295 3312 drv_usecwait(10);
3296 3313 } while (ioq->nq_active_cmds != 0);
3297 3314
3298 3315 return (0);
3299 3316 }
3300 3317
3301 3318 static int
3302 3319 nvme_bd_read(void *arg, bd_xfer_t *xfer)
3303 3320 {
3304 3321 nvme_namespace_t *ns = arg;
3305 3322
3306 3323 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_READ));
3307 3324 }
3308 3325
3309 3326 static int
3310 3327 nvme_bd_write(void *arg, bd_xfer_t *xfer)
3311 3328 {
3312 3329 nvme_namespace_t *ns = arg;
3313 3330
3314 3331 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_WRITE));
3315 3332 }
3316 3333
3317 3334 static int
3318 3335 nvme_bd_sync(void *arg, bd_xfer_t *xfer)
3319 3336 {
3320 3337 nvme_namespace_t *ns = arg;
3321 3338
3322 3339 if (ns->ns_nvme->n_dead)
3323 3340 return (EIO);
3324 3341
3325 3342 /*
3326 3343 * If the volatile write cache is not present or not enabled the FLUSH
3327 3344 * command is a no-op, so we can take a shortcut here.
3328 3345 */
3329 3346 if (!ns->ns_nvme->n_write_cache_present) {
3330 3347 bd_xfer_done(xfer, ENOTSUP);
3331 3348 return (0);
3332 3349 }
3333 3350
3334 3351 if (!ns->ns_nvme->n_write_cache_enabled) {
3335 3352 bd_xfer_done(xfer, 0);
3336 3353 return (0);
3337 3354 }
3338 3355
3339 3356 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_FLUSH));
3340 3357 }
3341 3358
3342 3359 static int
3343 3360 nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
3344 3361 {
3345 3362 nvme_namespace_t *ns = arg;
3346 3363
3347 3364 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
3348 3365 if (*(uint64_t *)ns->ns_eui64 != 0) {
3349 3366 return (ddi_devid_init(devinfo, DEVID_SCSI3_WWN,
3350 3367 sizeof (ns->ns_eui64), ns->ns_eui64, devid));
3351 3368 } else {
3352 3369 return (ddi_devid_init(devinfo, DEVID_ENCAP,
3353 3370 strlen(ns->ns_devid), ns->ns_devid, devid));
3354 3371 }
3355 3372 }
3356 3373
3357 3374 static int
3358 3375 nvme_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
3359 3376 {
3360 3377 #ifndef __lock_lint
3361 3378 _NOTE(ARGUNUSED(cred_p));
3362 3379 #endif
3363 3380 minor_t minor = getminor(*devp);
3364 3381 nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
3365 3382 int nsid = NVME_MINOR_NSID(minor);
3366 3383 nvme_minor_state_t *nm;
3367 3384 int rv = 0;
↓ open down ↓ |
741 lines elided |
↑ open up ↑ |
3368 3385
3369 3386 if (otyp != OTYP_CHR)
3370 3387 return (EINVAL);
3371 3388
3372 3389 if (nvme == NULL)
3373 3390 return (ENXIO);
3374 3391
3375 3392 if (nsid > nvme->n_namespace_count)
3376 3393 return (ENXIO);
3377 3394
3395 + if (nvme->n_dead)
3396 + return (EIO);
3397 +
3378 3398 nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
3379 3399
3380 3400 mutex_enter(&nm->nm_mutex);
3381 3401 if (nm->nm_oexcl) {
3382 3402 rv = EBUSY;
3383 3403 goto out;
3384 3404 }
3385 3405
3386 3406 if (flag & FEXCL) {
3387 3407 if (nm->nm_ocnt != 0) {
3388 3408 rv = EBUSY;
3389 3409 goto out;
3390 3410 }
3391 3411 nm->nm_oexcl = B_TRUE;
3392 3412 }
3393 3413
3394 3414 nm->nm_ocnt++;
3395 3415
3396 3416 out:
3397 3417 mutex_exit(&nm->nm_mutex);
3398 3418 return (rv);
3399 3419
3400 3420 }
3401 3421
3402 3422 static int
3403 3423 nvme_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
3404 3424 {
3405 3425 #ifndef __lock_lint
3406 3426 _NOTE(ARGUNUSED(cred_p));
3407 3427 _NOTE(ARGUNUSED(flag));
3408 3428 #endif
3409 3429 minor_t minor = getminor(dev);
3410 3430 nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
3411 3431 int nsid = NVME_MINOR_NSID(minor);
3412 3432 nvme_minor_state_t *nm;
3413 3433
3414 3434 if (otyp != OTYP_CHR)
3415 3435 return (ENXIO);
3416 3436
3417 3437 if (nvme == NULL)
3418 3438 return (ENXIO);
3419 3439
3420 3440 if (nsid > nvme->n_namespace_count)
3421 3441 return (ENXIO);
3422 3442
3423 3443 nm = nsid == 0 ? &nvme->n_minor : &nvme->n_ns[nsid - 1].ns_minor;
3424 3444
3425 3445 mutex_enter(&nm->nm_mutex);
3426 3446 if (nm->nm_oexcl)
3427 3447 nm->nm_oexcl = B_FALSE;
3428 3448
3429 3449 ASSERT(nm->nm_ocnt > 0);
3430 3450 nm->nm_ocnt--;
3431 3451 mutex_exit(&nm->nm_mutex);
3432 3452
3433 3453 return (0);
3434 3454 }
3435 3455
3436 3456 static int
3437 3457 nvme_ioctl_identify(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3438 3458 cred_t *cred_p)
3439 3459 {
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
3440 3460 _NOTE(ARGUNUSED(cred_p));
3441 3461 int rv = 0;
3442 3462 void *idctl;
3443 3463
3444 3464 if ((mode & FREAD) == 0)
3445 3465 return (EPERM);
3446 3466
3447 3467 if (nioc->n_len < NVME_IDENTIFY_BUFSIZE)
3448 3468 return (EINVAL);
3449 3469
3450 - idctl = nvme_identify(nvme, nsid);
3451 - if (idctl == NULL)
3452 - return (EIO);
3470 + if ((rv = nvme_identify(nvme, nsid, (void **)&idctl)) != 0)
3471 + return (rv);
3453 3472
3454 3473 if (ddi_copyout(idctl, (void *)nioc->n_buf, NVME_IDENTIFY_BUFSIZE, mode)
3455 3474 != 0)
3456 3475 rv = EFAULT;
3457 3476
3458 3477 kmem_free(idctl, NVME_IDENTIFY_BUFSIZE);
3459 3478
3460 3479 return (rv);
3461 3480 }
3462 3481
3463 3482 static int
3464 3483 nvme_ioctl_capabilities(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
3465 3484 int mode, cred_t *cred_p)
3466 3485 {
3467 3486 _NOTE(ARGUNUSED(nsid, cred_p));
3468 3487 int rv = 0;
3469 3488 nvme_reg_cap_t cap = { 0 };
3470 3489 nvme_capabilities_t nc;
3471 3490
3472 3491 if ((mode & FREAD) == 0)
3473 3492 return (EPERM);
3474 3493
3475 3494 if (nioc->n_len < sizeof (nc))
3476 3495 return (EINVAL);
3477 3496
3478 3497 cap.r = nvme_get64(nvme, NVME_REG_CAP);
3479 3498
3480 3499 /*
3481 3500 * The MPSMIN and MPSMAX fields in the CAP register use 0 to
3482 3501 * specify the base page size of 4k (1<<12), so add 12 here to
3483 3502 * get the real page size value.
3484 3503 */
3485 3504 nc.mpsmax = 1 << (12 + cap.b.cap_mpsmax);
3486 3505 nc.mpsmin = 1 << (12 + cap.b.cap_mpsmin);
3487 3506
3488 3507 if (ddi_copyout(&nc, (void *)nioc->n_buf, sizeof (nc), mode) != 0)
3489 3508 rv = EFAULT;
3490 3509
3491 3510 return (rv);
3492 3511 }
3493 3512
3494 3513 static int
3495 3514 nvme_ioctl_get_logpage(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
3496 3515 int mode, cred_t *cred_p)
3497 3516 {
3498 3517 _NOTE(ARGUNUSED(cred_p));
3499 3518 void *log = NULL;
3500 3519 size_t bufsize = 0;
3501 3520 int rv = 0;
3502 3521
3503 3522 if ((mode & FREAD) == 0)
3504 3523 return (EPERM);
3505 3524
3506 3525 switch (nioc->n_arg) {
3507 3526 case NVME_LOGPAGE_ERROR:
3508 3527 if (nsid != 0)
3509 3528 return (EINVAL);
3510 3529 break;
3511 3530 case NVME_LOGPAGE_HEALTH:
3512 3531 if (nsid != 0 && nvme->n_idctl->id_lpa.lp_smart == 0)
3513 3532 return (EINVAL);
3514 3533
3515 3534 if (nsid == 0)
3516 3535 nsid = (uint32_t)-1;
3517 3536
3518 3537 break;
3519 3538 case NVME_LOGPAGE_FWSLOT:
3520 3539 if (nsid != 0)
3521 3540 return (EINVAL);
3522 3541 break;
3523 3542 default:
3524 3543 return (EINVAL);
3525 3544 }
3526 3545
3527 3546 if (nvme_get_logpage(nvme, &log, &bufsize, nioc->n_arg, nsid)
3528 3547 != DDI_SUCCESS)
3529 3548 return (EIO);
3530 3549
3531 3550 if (nioc->n_len < bufsize) {
3532 3551 kmem_free(log, bufsize);
3533 3552 return (EINVAL);
3534 3553 }
3535 3554
3536 3555 if (ddi_copyout(log, (void *)nioc->n_buf, bufsize, mode) != 0)
3537 3556 rv = EFAULT;
3538 3557
3539 3558 nioc->n_len = bufsize;
3540 3559 kmem_free(log, bufsize);
3541 3560
3542 3561 return (rv);
3543 3562 }
3544 3563
3545 3564 static int
3546 3565 nvme_ioctl_get_features(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc,
3547 3566 int mode, cred_t *cred_p)
3548 3567 {
3549 3568 _NOTE(ARGUNUSED(cred_p));
3550 3569 void *buf = NULL;
3551 3570 size_t bufsize = 0;
3552 3571 uint32_t res = 0;
3553 3572 uint8_t feature;
3554 3573 int rv = 0;
3555 3574
3556 3575 if ((mode & FREAD) == 0)
3557 3576 return (EPERM);
3558 3577
3559 3578 if ((nioc->n_arg >> 32) > 0xff)
3560 3579 return (EINVAL);
3561 3580
3562 3581 feature = (uint8_t)(nioc->n_arg >> 32);
3563 3582
3564 3583 switch (feature) {
3565 3584 case NVME_FEAT_ARBITRATION:
3566 3585 case NVME_FEAT_POWER_MGMT:
3567 3586 case NVME_FEAT_TEMPERATURE:
3568 3587 case NVME_FEAT_ERROR:
3569 3588 case NVME_FEAT_NQUEUES:
3570 3589 case NVME_FEAT_INTR_COAL:
3571 3590 case NVME_FEAT_WRITE_ATOM:
3572 3591 case NVME_FEAT_ASYNC_EVENT:
3573 3592 case NVME_FEAT_PROGRESS:
3574 3593 if (nsid != 0)
3575 3594 return (EINVAL);
3576 3595 break;
3577 3596
3578 3597 case NVME_FEAT_INTR_VECT:
3579 3598 if (nsid != 0)
3580 3599 return (EINVAL);
3581 3600
3582 3601 res = nioc->n_arg & 0xffffffffUL;
3583 3602 if (res >= nvme->n_intr_cnt)
3584 3603 return (EINVAL);
3585 3604 break;
3586 3605
3587 3606 case NVME_FEAT_LBA_RANGE:
3588 3607 if (nvme->n_lba_range_supported == B_FALSE)
3589 3608 return (EINVAL);
3590 3609
3591 3610 if (nsid == 0 ||
3592 3611 nsid > nvme->n_namespace_count)
3593 3612 return (EINVAL);
3594 3613
3595 3614 break;
3596 3615
3597 3616 case NVME_FEAT_WRITE_CACHE:
3598 3617 if (nsid != 0)
3599 3618 return (EINVAL);
3600 3619
3601 3620 if (!nvme->n_write_cache_present)
3602 3621 return (EINVAL);
3603 3622
3604 3623 break;
3605 3624
3606 3625 case NVME_FEAT_AUTO_PST:
3607 3626 if (nsid != 0)
3608 3627 return (EINVAL);
↓ open down ↓ |
146 lines elided |
↑ open up ↑ |
3609 3628
3610 3629 if (!nvme->n_auto_pst_supported)
3611 3630 return (EINVAL);
3612 3631
3613 3632 break;
3614 3633
3615 3634 default:
3616 3635 return (EINVAL);
3617 3636 }
3618 3637
3619 - if (nvme_get_features(nvme, nsid, feature, &res, &buf, &bufsize) ==
3620 - B_FALSE)
3621 - return (EIO);
3638 + rv = nvme_get_features(nvme, nsid, feature, &res, &buf, &bufsize);
3639 + if (rv != 0)
3640 + return (rv);
3622 3641
3623 3642 if (nioc->n_len < bufsize) {
3624 3643 kmem_free(buf, bufsize);
3625 3644 return (EINVAL);
3626 3645 }
3627 3646
3628 3647 if (buf && ddi_copyout(buf, (void*)nioc->n_buf, bufsize, mode) != 0)
3629 3648 rv = EFAULT;
3630 3649
3631 3650 kmem_free(buf, bufsize);
3632 3651 nioc->n_arg = res;
3633 3652 nioc->n_len = bufsize;
3634 3653
3635 3654 return (rv);
3636 3655 }
3637 3656
3638 3657 static int
3639 3658 nvme_ioctl_intr_cnt(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3640 3659 cred_t *cred_p)
3641 3660 {
3642 3661 _NOTE(ARGUNUSED(nsid, mode, cred_p));
3643 3662
3644 3663 if ((mode & FREAD) == 0)
3645 3664 return (EPERM);
3646 3665
3647 3666 nioc->n_arg = nvme->n_intr_cnt;
3648 3667 return (0);
3649 3668 }
3650 3669
3651 3670 static int
3652 3671 nvme_ioctl_version(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3653 3672 cred_t *cred_p)
3654 3673 {
3655 3674 _NOTE(ARGUNUSED(nsid, cred_p));
3656 3675 int rv = 0;
3657 3676
3658 3677 if ((mode & FREAD) == 0)
3659 3678 return (EPERM);
3660 3679
3661 3680 if (nioc->n_len < sizeof (nvme->n_version))
3662 3681 return (ENOMEM);
3663 3682
3664 3683 if (ddi_copyout(&nvme->n_version, (void *)nioc->n_buf,
3665 3684 sizeof (nvme->n_version), mode) != 0)
3666 3685 rv = EFAULT;
3667 3686
3668 3687 return (rv);
3669 3688 }
3670 3689
3671 3690 static int
3672 3691 nvme_ioctl_format(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3673 3692 cred_t *cred_p)
3674 3693 {
3675 3694 _NOTE(ARGUNUSED(mode));
3676 3695 nvme_format_nvm_t frmt = { 0 };
3677 3696 int c_nsid = nsid != 0 ? nsid - 1 : 0;
3678 3697
3679 3698 if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
3680 3699 return (EPERM);
3681 3700
3682 3701 frmt.r = nioc->n_arg & 0xffffffff;
3683 3702
3684 3703 /*
3685 3704 * Check whether the FORMAT NVM command is supported.
3686 3705 */
3687 3706 if (nvme->n_idctl->id_oacs.oa_format == 0)
3688 3707 return (EINVAL);
3689 3708
3690 3709 /*
3691 3710 * Don't allow format or secure erase of individual namespace if that
3692 3711 * would cause a format or secure erase of all namespaces.
3693 3712 */
3694 3713 if (nsid != 0 && nvme->n_idctl->id_fna.fn_format != 0)
3695 3714 return (EINVAL);
3696 3715
3697 3716 if (nsid != 0 && frmt.b.fm_ses != NVME_FRMT_SES_NONE &&
3698 3717 nvme->n_idctl->id_fna.fn_sec_erase != 0)
3699 3718 return (EINVAL);
3700 3719
3701 3720 /*
3702 3721 * Don't allow formatting with Protection Information.
3703 3722 */
3704 3723 if (frmt.b.fm_pi != 0 || frmt.b.fm_pil != 0 || frmt.b.fm_ms != 0)
3705 3724 return (EINVAL);
3706 3725
3707 3726 /*
3708 3727 * Don't allow formatting using an illegal LBA format, or any LBA format
3709 3728 * that uses metadata.
3710 3729 */
3711 3730 if (frmt.b.fm_lbaf > nvme->n_ns[c_nsid].ns_idns->id_nlbaf ||
3712 3731 nvme->n_ns[c_nsid].ns_idns->id_lbaf[frmt.b.fm_lbaf].lbaf_ms != 0)
3713 3732 return (EINVAL);
3714 3733
3715 3734 /*
3716 3735 * Don't allow formatting using an illegal Secure Erase setting.
3717 3736 */
3718 3737 if (frmt.b.fm_ses > NVME_FRMT_MAX_SES ||
3719 3738 (frmt.b.fm_ses == NVME_FRMT_SES_CRYPTO &&
3720 3739 nvme->n_idctl->id_fna.fn_crypt_erase == 0))
3721 3740 return (EINVAL);
3722 3741
3723 3742 if (nsid == 0)
3724 3743 nsid = (uint32_t)-1;
3725 3744
3726 3745 return (nvme_format_nvm(nvme, nsid, frmt.b.fm_lbaf, B_FALSE, 0, B_FALSE,
3727 3746 frmt.b.fm_ses));
3728 3747 }
3729 3748
3730 3749 static int
3731 3750 nvme_ioctl_detach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3732 3751 cred_t *cred_p)
3733 3752 {
3734 3753 _NOTE(ARGUNUSED(nioc, mode));
3735 3754 int rv = 0;
3736 3755
3737 3756 if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
3738 3757 return (EPERM);
3739 3758
3740 3759 if (nsid == 0)
3741 3760 return (EINVAL);
3742 3761
3743 3762 rv = bd_detach_handle(nvme->n_ns[nsid - 1].ns_bd_hdl);
3744 3763 if (rv != DDI_SUCCESS)
3745 3764 rv = EBUSY;
3746 3765
3747 3766 return (rv);
3748 3767 }
3749 3768
3750 3769 static int
3751 3770 nvme_ioctl_attach(nvme_t *nvme, int nsid, nvme_ioctl_t *nioc, int mode,
3752 3771 cred_t *cred_p)
3753 3772 {
3754 3773 _NOTE(ARGUNUSED(nioc, mode));
3755 3774 nvme_identify_nsid_t *idns;
3756 3775 int rv = 0;
3757 3776
3758 3777 if ((mode & FWRITE) == 0 || secpolicy_sys_config(cred_p, B_FALSE) != 0)
3759 3778 return (EPERM);
3760 3779
3761 3780 if (nsid == 0)
3762 3781 return (EINVAL);
3763 3782
3764 3783 /*
3765 3784 * Identify namespace again, free old identify data.
3766 3785 */
3767 3786 idns = nvme->n_ns[nsid - 1].ns_idns;
3768 3787 if (nvme_init_ns(nvme, nsid) != DDI_SUCCESS)
3769 3788 return (EIO);
3770 3789
3771 3790 kmem_free(idns, sizeof (nvme_identify_nsid_t));
3772 3791
3773 3792 rv = bd_attach_handle(nvme->n_dip, nvme->n_ns[nsid - 1].ns_bd_hdl);
3774 3793 if (rv != DDI_SUCCESS)
3775 3794 rv = EBUSY;
3776 3795
3777 3796 return (rv);
3778 3797 }
3779 3798
3780 3799 static int
3781 3800 nvme_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p,
3782 3801 int *rval_p)
3783 3802 {
3784 3803 #ifndef __lock_lint
3785 3804 _NOTE(ARGUNUSED(rval_p));
3786 3805 #endif
3787 3806 minor_t minor = getminor(dev);
3788 3807 nvme_t *nvme = ddi_get_soft_state(nvme_state, NVME_MINOR_INST(minor));
3789 3808 int nsid = NVME_MINOR_NSID(minor);
3790 3809 int rv = 0;
3791 3810 nvme_ioctl_t nioc;
3792 3811
3793 3812 int (*nvme_ioctl[])(nvme_t *, int, nvme_ioctl_t *, int, cred_t *) = {
3794 3813 NULL,
3795 3814 nvme_ioctl_identify,
3796 3815 nvme_ioctl_identify,
3797 3816 nvme_ioctl_capabilities,
3798 3817 nvme_ioctl_get_logpage,
3799 3818 nvme_ioctl_get_features,
3800 3819 nvme_ioctl_intr_cnt,
3801 3820 nvme_ioctl_version,
3802 3821 nvme_ioctl_format,
3803 3822 nvme_ioctl_detach,
3804 3823 nvme_ioctl_attach
3805 3824 };
3806 3825
3807 3826 if (nvme == NULL)
3808 3827 return (ENXIO);
3809 3828
3810 3829 if (nsid > nvme->n_namespace_count)
3811 3830 return (ENXIO);
3812 3831
3813 3832 if (IS_DEVCTL(cmd))
3814 3833 return (ndi_devctl_ioctl(nvme->n_dip, cmd, arg, mode, 0));
3815 3834
3816 3835 #ifdef _MULTI_DATAMODEL
3817 3836 switch (ddi_model_convert_from(mode & FMODELS)) {
3818 3837 case DDI_MODEL_ILP32: {
3819 3838 nvme_ioctl32_t nioc32;
3820 3839 if (ddi_copyin((void*)arg, &nioc32, sizeof (nvme_ioctl32_t),
3821 3840 mode) != 0)
3822 3841 return (EFAULT);
3823 3842 nioc.n_len = nioc32.n_len;
3824 3843 nioc.n_buf = nioc32.n_buf;
3825 3844 nioc.n_arg = nioc32.n_arg;
3826 3845 break;
3827 3846 }
↓ open down ↓ |
196 lines elided |
↑ open up ↑ |
3828 3847 case DDI_MODEL_NONE:
3829 3848 #endif
3830 3849 if (ddi_copyin((void*)arg, &nioc, sizeof (nvme_ioctl_t), mode)
3831 3850 != 0)
3832 3851 return (EFAULT);
3833 3852 #ifdef _MULTI_DATAMODEL
3834 3853 break;
3835 3854 }
3836 3855 #endif
3837 3856
3857 + if (nvme->n_dead && cmd != NVME_IOC_DETACH)
3858 + return (EIO);
3859 +
3860 +
3838 3861 if (cmd == NVME_IOC_IDENTIFY_CTRL) {
3839 3862 /*
3840 3863 * This makes NVME_IOC_IDENTIFY_CTRL work the same on devctl and
3841 3864 * attachment point nodes.
3842 3865 */
3843 3866 nsid = 0;
3844 3867 } else if (cmd == NVME_IOC_IDENTIFY_NSID && nsid == 0) {
3845 3868 /*
3846 3869 * This makes NVME_IOC_IDENTIFY_NSID work on a devctl node, it
3847 3870 * will always return identify data for namespace 1.
3848 3871 */
3849 3872 nsid = 1;
3850 3873 }
3851 3874
3852 3875 if (IS_NVME_IOC(cmd) && nvme_ioctl[NVME_IOC_CMD(cmd)] != NULL)
3853 3876 rv = nvme_ioctl[NVME_IOC_CMD(cmd)](nvme, nsid, &nioc, mode,
3854 3877 cred_p);
3855 3878 else
3856 3879 rv = EINVAL;
3857 3880
3858 3881 #ifdef _MULTI_DATAMODEL
3859 3882 switch (ddi_model_convert_from(mode & FMODELS)) {
3860 3883 case DDI_MODEL_ILP32: {
3861 3884 nvme_ioctl32_t nioc32;
3862 3885
3863 3886 nioc32.n_len = (size32_t)nioc.n_len;
3864 3887 nioc32.n_buf = (uintptr32_t)nioc.n_buf;
3865 3888 nioc32.n_arg = nioc.n_arg;
3866 3889
3867 3890 if (ddi_copyout(&nioc32, (void *)arg, sizeof (nvme_ioctl32_t),
3868 3891 mode) != 0)
3869 3892 return (EFAULT);
3870 3893 break;
3871 3894 }
3872 3895 case DDI_MODEL_NONE:
3873 3896 #endif
3874 3897 if (ddi_copyout(&nioc, (void *)arg, sizeof (nvme_ioctl_t), mode)
3875 3898 != 0)
3876 3899 return (EFAULT);
3877 3900 #ifdef _MULTI_DATAMODEL
3878 3901 break;
3879 3902 }
3880 3903 #endif
3881 3904
3882 3905 return (rv);
3883 3906 }
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX