1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
  23  * All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2009, Intel Corporation.
  27  * All rights reserved.
  28  */
  29 /*
  30  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  31  * Copyright 2017 Joyent, Inc.
  32  */
  33 
  34 /*
  35  * DVMA code
  36  * This file contains Intel IOMMU code that deals with DVMA
  37  * i.e. DMA remapping.
  38  */
  39 
  40 #include <sys/sysmacros.h>
  41 #include <sys/pcie.h>
  42 #include <sys/pci_cfgspace.h>
  43 #include <vm/hat_i86.h>
  44 #include <sys/memlist.h>
  45 #include <sys/acpi/acpi.h>
  46 #include <sys/acpica.h>
  47 #include <sys/modhash.h>
  48 #include <sys/immu.h>
  49 #include <sys/x86_archext.h>
  50 #include <sys/archsystm.h>
  51 
  52 #undef  TEST
  53 
  54 /*
  55  * Macros based on PCI spec
  56  */
  57 #define IMMU_PCI_REV2CLASS(r)   ((r) >> 8)  /* classcode from revid */
  58 #define IMMU_PCI_CLASS2BASE(c)  ((c) >> 16) /* baseclass from classcode */
  59 #define IMMU_PCI_CLASS2SUB(c)   (((c) >> 8) & 0xff); /* classcode */
  60 
  61 #define IMMU_CONTIG_PADDR(d, p) \
  62         ((d).dck_paddr && ((d).dck_paddr + (d).dck_npages * IMMU_PAGESIZE) \
  63             == (p))
  64 
  65 typedef struct dvma_arg {
  66         immu_t *dva_immu;
  67         dev_info_t *dva_rdip;
  68         dev_info_t *dva_ddip;
  69         domain_t *dva_domain;
  70         int dva_level;
  71         immu_flags_t dva_flags;
  72         list_t *dva_list;
  73         int dva_error;
  74 } dvma_arg_t;
  75 
  76 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
  77     dev_info_t *rdip, immu_flags_t immu_flags);
  78 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
  79     int dev, int func, immu_flags_t immu_flags);
  80 static void destroy_immu_devi(immu_devi_t *immu_devi);
  81 static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
  82     uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
  83     immu_flags_t immu_flags);
  84 
  85 /* Extern globals */
  86 extern struct memlist  *phys_install;
  87 
  88 /*
  89  * iommulib interface functions.
  90  */
  91 static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
  92 static int immu_allochdl(iommulib_handle_t handle,
  93     dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
  94     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
  95 static int immu_freehdl(iommulib_handle_t handle,
  96     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
  97 static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
  98     dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
  99     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 100 static int immu_unbindhdl(iommulib_handle_t handle,
 101     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
 102 static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
 103     dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
 104     uint_t cachefl);
 105 static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
 106     dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
 107     off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 108 static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
 109     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
 110     struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
 111 static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
 112     dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
 113 
 114 /* static Globals */
 115 
 116 /*
 117  * Used to setup DMA objects (memory regions)
 118  * for DMA reads by IOMMU units
 119  */
 120 static ddi_dma_attr_t immu_dma_attr = {
 121         DMA_ATTR_V0,
 122         0U,
 123         0xffffffffffffffffULL,
 124         0xffffffffU,
 125         MMU_PAGESIZE, /* MMU page aligned */
 126         0x1,
 127         0x1,
 128         0xffffffffU,
 129         0xffffffffffffffffULL,
 130         1,
 131         4,
 132         0
 133 };
 134 
 135 static ddi_device_acc_attr_t immu_acc_attr = {
 136         DDI_DEVICE_ATTR_V0,
 137         DDI_NEVERSWAP_ACC,
 138         DDI_STRICTORDER_ACC
 139 };
 140 
 141 struct iommulib_ops immulib_ops = {
 142         IOMMU_OPS_VERSION,
 143         INTEL_IOMMU,
 144         "Intel IOMMU",
 145         NULL,
 146         immu_probe,
 147         immu_allochdl,
 148         immu_freehdl,
 149         immu_bindhdl,
 150         immu_unbindhdl,
 151         immu_sync,
 152         immu_win,
 153         immu_mapobject,
 154         immu_unmapobject,
 155 };
 156 
 157 /*
 158  * Fake physical address range used to set up initial prealloc mappings.
 159  * This memory is never actually accessed. It is mapped read-only,
 160  * and is overwritten as soon as the first DMA bind operation is
 161  * performed. Since 0 is a special case, just start at the 2nd
 162  * physical page.
 163  */
 164 
 165 static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
 166 
 167 /* globals private to this file */
 168 static kmutex_t immu_domain_lock;
 169 static list_t immu_unity_domain_list;
 170 static list_t immu_xlate_domain_list;
 171 
 172 /* structure used to store idx into each level of the page tables */
 173 typedef struct xlate {
 174         int xlt_level;
 175         uint_t xlt_idx;
 176         pgtable_t *xlt_pgtable;
 177 } xlate_t;
 178 
 179 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
 180 #define IMMU_UNITY_DID   1
 181 
 182 static mod_hash_t *bdf_domain_hash;
 183 
 184 int immu_use_alh;
 185 int immu_use_tm;
 186 
 187 static domain_t *
 188 bdf_domain_lookup(immu_devi_t *immu_devi)
 189 {
 190         domain_t *domain;
 191         int16_t seg = immu_devi->imd_seg;
 192         int16_t bus = immu_devi->imd_bus;
 193         int16_t devfunc = immu_devi->imd_devfunc;
 194         uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 195 
 196         if (seg < 0 || bus < 0 || devfunc < 0) {
 197                 return (NULL);
 198         }
 199 
 200         domain = NULL;
 201         if (mod_hash_find(bdf_domain_hash,
 202             (void *)bdf, (void *)&domain) == 0) {
 203                 ASSERT(domain);
 204                 ASSERT(domain->dom_did > 0);
 205                 return (domain);
 206         } else {
 207                 return (NULL);
 208         }
 209 }
 210 
 211 static void
 212 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
 213 {
 214         int16_t seg = immu_devi->imd_seg;
 215         int16_t bus = immu_devi->imd_bus;
 216         int16_t devfunc = immu_devi->imd_devfunc;
 217         uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 218 
 219         if (seg < 0 || bus < 0 || devfunc < 0) {
 220                 return;
 221         }
 222 
 223         (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
 224 }
 225 
 226 static int
 227 match_lpc(dev_info_t *pdip, void *arg)
 228 {
 229         immu_devi_t *immu_devi;
 230         dvma_arg_t *dvap = (dvma_arg_t *)arg;
 231 
 232         if (list_is_empty(dvap->dva_list)) {
 233                 return (DDI_WALK_TERMINATE);
 234         }
 235 
 236         immu_devi = list_head(dvap->dva_list);
 237         for (; immu_devi; immu_devi = list_next(dvap->dva_list,
 238             immu_devi)) {
 239                 if (immu_devi->imd_dip == pdip) {
 240                         dvap->dva_ddip = pdip;
 241                         dvap->dva_error = DDI_SUCCESS;
 242                         return (DDI_WALK_TERMINATE);
 243                 }
 244         }
 245 
 246         return (DDI_WALK_CONTINUE);
 247 }
 248 
 249 static void
 250 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
 251 {
 252         list_t *spclist = NULL;
 253         immu_devi_t *immu_devi;
 254 
 255         immu_devi = IMMU_DEVI(dip);
 256         if (immu_devi->imd_display == B_TRUE) {
 257                 spclist = &(immu->immu_dvma_gfx_list);
 258         } else if (immu_devi->imd_lpc == B_TRUE) {
 259                 spclist = &(immu->immu_dvma_lpc_list);
 260         }
 261 
 262         if (spclist) {
 263                 mutex_enter(&(immu->immu_lock));
 264                 list_insert_head(spclist, immu_devi);
 265                 mutex_exit(&(immu->immu_lock));
 266         }
 267 }
 268 
 269 /*
 270  * Set the immu_devi struct in the immu_devi field of a devinfo node
 271  */
 272 int
 273 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
 274 {
 275         int bus, dev, func;
 276         immu_devi_t *new_imd;
 277         immu_devi_t *immu_devi;
 278 
 279         immu_devi = immu_devi_get(dip);
 280         if (immu_devi != NULL) {
 281                 return (DDI_SUCCESS);
 282         }
 283 
 284         bus = dev = func = -1;
 285 
 286         /*
 287          * Assume a new immu_devi struct is needed
 288          */
 289         if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
 290                 /*
 291                  * No BDF. Set bus = -1 to indicate this.
 292                  * We still need to create a immu_devi struct
 293                  * though
 294                  */
 295                 bus = -1;
 296                 dev = 0;
 297                 func = 0;
 298         }
 299 
 300         new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
 301         if (new_imd  == NULL) {
 302                 ddi_err(DER_WARN, dip, "Failed to create immu_devi "
 303                     "structure");
 304                 return (DDI_FAILURE);
 305         }
 306 
 307         /*
 308          * Check if some other thread allocated a immu_devi while we
 309          * didn't own the lock.
 310          */
 311         mutex_enter(&(DEVI(dip)->devi_lock));
 312         if (IMMU_DEVI(dip) == NULL) {
 313                 IMMU_DEVI_SET(dip, new_imd);
 314         } else {
 315                 destroy_immu_devi(new_imd);
 316         }
 317         mutex_exit(&(DEVI(dip)->devi_lock));
 318 
 319         return (DDI_SUCCESS);
 320 }
 321 
 322 static dev_info_t *
 323 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
 324 {
 325         dvma_arg_t dvarg = {0};
 326         dvarg.dva_list = &(immu->immu_dvma_lpc_list);
 327         dvarg.dva_rdip = rdip;
 328         dvarg.dva_error = DDI_FAILURE;
 329 
 330         if (immu_walk_ancestor(rdip, NULL, match_lpc,
 331             &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
 332                 ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
 333                     "find lpc_devinfo for ISA device");
 334                 return (NULL);
 335         }
 336 
 337         if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
 338                 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
 339                     "ISA device");
 340                 return (NULL);
 341         }
 342 
 343         return (dvarg.dva_ddip);
 344 }
 345 
 346 static dev_info_t *
 347 get_gfx_devinfo(dev_info_t *rdip)
 348 {
 349         immu_t *immu;
 350         immu_devi_t *immu_devi;
 351         list_t *list_gfx;
 352 
 353         /*
 354          * The GFX device may not be on the same iommu unit as "agpgart"
 355          * so search globally
 356          */
 357         immu_devi = NULL;
 358         immu = list_head(&immu_list);
 359         for (; immu; immu = list_next(&immu_list, immu)) {
 360                 list_gfx = &(immu->immu_dvma_gfx_list);
 361                 if (!list_is_empty(list_gfx)) {
 362                         immu_devi = list_head(list_gfx);
 363                         break;
 364                 }
 365         }
 366 
 367         if (immu_devi == NULL) {
 368                 ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
 369                     "Cannot redirect agpgart");
 370                 return (NULL);
 371         }
 372 
 373         ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
 374             ddi_node_name(immu_devi->imd_dip));
 375 
 376         return (immu_devi->imd_dip);
 377 }
 378 
 379 static immu_flags_t
 380 dma_to_immu_flags(struct ddi_dma_req *dmareq)
 381 {
 382         immu_flags_t flags = 0;
 383 
 384         if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
 385                 flags |= IMMU_FLAGS_SLEEP;
 386         } else {
 387                 flags |= IMMU_FLAGS_NOSLEEP;
 388         }
 389 
 390 #ifdef BUGGY_DRIVERS
 391 
 392         flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 393 
 394 #else
 395         /*
 396          * Read and write flags need to be reversed.
 397          * DMA_READ means read from device and write
 398          * to memory. So DMA read means DVMA write.
 399          */
 400         if (dmareq->dmar_flags & DDI_DMA_READ)
 401                 flags |= IMMU_FLAGS_WRITE;
 402 
 403         if (dmareq->dmar_flags & DDI_DMA_WRITE)
 404                 flags |= IMMU_FLAGS_READ;
 405 
 406         /*
 407          * Some buggy drivers specify neither READ or WRITE
 408          * For such drivers set both read and write permissions
 409          */
 410         if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
 411                 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 412         }
 413 #endif
 414 
 415         return (flags);
 416 }
 417 
 418 /*ARGSUSED*/
 419 int
 420 pgtable_ctor(void *buf, void *arg, int kmflag)
 421 {
 422         size_t actual_size = 0;
 423         pgtable_t *pgtable;
 424         int (*dmafp)(caddr_t);
 425         caddr_t vaddr;
 426         void *next;
 427         uint_t flags;
 428         immu_t *immu = arg;
 429 
 430         pgtable = (pgtable_t *)buf;
 431 
 432         dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
 433 
 434         next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
 435         if (next == NULL) {
 436                 return (-1);
 437         }
 438 
 439         if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
 440             dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
 441                 kmem_free(next, IMMU_PAGESIZE);
 442                 return (-1);
 443         }
 444 
 445         flags = DDI_DMA_CONSISTENT;
 446         if (!immu->immu_dvma_coherent)
 447                 flags |= IOMEM_DATA_UC_WR_COMBINE;
 448 
 449         if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
 450             &immu_acc_attr, flags,
 451             dmafp, NULL, &vaddr, &actual_size,
 452             &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
 453                 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 454                 kmem_free(next, IMMU_PAGESIZE);
 455                 return (-1);
 456         }
 457 
 458         /*
 459          * Memory allocation failure. Maybe a temporary condition
 460          * so return error rather than panic, so we can try again
 461          */
 462         if (actual_size < IMMU_PAGESIZE) {
 463                 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 464                 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 465                 kmem_free(next, IMMU_PAGESIZE);
 466                 return (-1);
 467         }
 468 
 469         pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
 470         pgtable->hwpg_vaddr = vaddr;
 471         pgtable->swpg_next_array = next;
 472 
 473         rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
 474 
 475         return (0);
 476 }
 477 
 478 /*ARGSUSED*/
 479 void
 480 pgtable_dtor(void *buf, void *arg)
 481 {
 482         pgtable_t *pgtable;
 483 
 484         pgtable = (pgtable_t *)buf;
 485 
 486         /* destroy will panic if lock is held. */
 487         rw_destroy(&(pgtable->swpg_rwlock));
 488 
 489         ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 490         ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 491         kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
 492 }
 493 
 494 /*
 495  * pgtable_alloc()
 496  *      alloc a IOMMU pgtable structure.
 497  *      This same struct is used for root and context tables as well.
 498  *      This routine allocs the f/ollowing:
 499  *      - a pgtable_t struct
 500  *      - a HW page which holds PTEs/entries which is accesssed by HW
 501  *        so we set up DMA for this page
 502  *      - a SW page which is only for our bookeeping
 503  *        (for example to  hold pointers to the next level pgtable).
 504  *        So a simple kmem_alloc suffices
 505  */
 506 static pgtable_t *
 507 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
 508 {
 509         pgtable_t *pgtable;
 510         int kmflags;
 511 
 512         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 513 
 514         pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
 515         if (pgtable == NULL) {
 516                 return (NULL);
 517         }
 518         return (pgtable);
 519 }
 520 
 521 static void
 522 pgtable_zero(pgtable_t *pgtable)
 523 {
 524         bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
 525         bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
 526 }
 527 
 528 static void
 529 pgtable_free(immu_t *immu, pgtable_t *pgtable)
 530 {
 531         kmem_cache_free(immu->immu_pgtable_cache, pgtable);
 532 }
 533 
 534 /*
 535  * Function to identify a display device from the PCI class code
 536  */
 537 static boolean_t
 538 device_is_display(uint_t classcode)
 539 {
 540         static uint_t disp_classes[] = {
 541                 0x000100,
 542                 0x030000,
 543                 0x030001
 544         };
 545         int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
 546 
 547         for (i = 0; i < nclasses; i++) {
 548                 if (classcode == disp_classes[i])
 549                         return (B_TRUE);
 550         }
 551         return (B_FALSE);
 552 }
 553 
 554 /*
 555  * Function that determines if device is PCIEX and/or PCIEX bridge
 556  */
 557 static boolean_t
 558 device_is_pciex(
 559         uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
 560 {
 561         ushort_t cap;
 562         ushort_t capsp;
 563         ushort_t cap_count = PCI_CAP_MAX_PTR;
 564         ushort_t status;
 565         boolean_t is_pciex = B_FALSE;
 566 
 567         *is_pcib = B_FALSE;
 568 
 569         status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
 570         if (!(status & PCI_STAT_CAP))
 571                 return (B_FALSE);
 572 
 573         capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
 574         while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
 575                 capsp &= PCI_CAP_PTR_MASK;
 576                 cap = pci_getb_func(bus, dev, func, capsp);
 577 
 578                 if (cap == PCI_CAP_ID_PCI_E) {
 579                         status = pci_getw_func(bus, dev, func, capsp + 2);
 580                         /*
 581                          * See section 7.8.2 of PCI-Express Base Spec v1.0a
 582                          * for Device/Port Type.
 583                          * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
 584                          * device is a PCIE2PCI bridge
 585                          */
 586                         *is_pcib =
 587                             ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
 588                             PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
 589                         is_pciex = B_TRUE;
 590                 }
 591 
 592                 capsp = (*pci_getb_func)(bus, dev, func,
 593                     capsp + PCI_CAP_NEXT_PTR);
 594         }
 595 
 596         return (is_pciex);
 597 }
 598 
 599 static boolean_t
 600 device_use_premap(uint_t classcode)
 601 {
 602         if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
 603                 return (B_TRUE);
 604         return (B_FALSE);
 605 }
 606 
 607 
 608 /*
 609  * immu_dvma_get_immu()
 610  *   get the immu unit structure for a dev_info node
 611  */
 612 immu_t *
 613 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
 614 {
 615         immu_devi_t *immu_devi;
 616         immu_t *immu;
 617 
 618         /*
 619          * check if immu unit was already found earlier.
 620          * If yes, then it will be stashed in immu_devi struct.
 621          */
 622         immu_devi = immu_devi_get(dip);
 623         if (immu_devi == NULL) {
 624                 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
 625                         /*
 626                          * May fail because of low memory. Return error rather
 627                          * than panic as we want driver to rey again later
 628                          */
 629                         ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 630                             "No immu_devi structure");
 631                         /*NOTREACHED*/
 632                 }
 633                 immu_devi = immu_devi_get(dip);
 634         }
 635 
 636         mutex_enter(&(DEVI(dip)->devi_lock));
 637         if (immu_devi->imd_immu) {
 638                 immu = immu_devi->imd_immu;
 639                 mutex_exit(&(DEVI(dip)->devi_lock));
 640                 return (immu);
 641         }
 642         mutex_exit(&(DEVI(dip)->devi_lock));
 643 
 644         immu = immu_dmar_get_immu(dip);
 645         if (immu == NULL) {
 646                 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 647                     "Cannot find immu_t for device");
 648                 /*NOTREACHED*/
 649         }
 650 
 651         /*
 652          * Check if some other thread found immu
 653          * while lock was not held
 654          */
 655         immu_devi = immu_devi_get(dip);
 656         /* immu_devi should be present as we found it earlier */
 657         if (immu_devi == NULL) {
 658                 ddi_err(DER_PANIC, dip,
 659                     "immu_dvma_get_immu: No immu_devi structure");
 660                 /*NOTREACHED*/
 661         }
 662 
 663         mutex_enter(&(DEVI(dip)->devi_lock));
 664         if (immu_devi->imd_immu == NULL) {
 665                 /* nobody else set it, so we should do it */
 666                 immu_devi->imd_immu = immu;
 667                 immu_devi_set_spclist(dip, immu);
 668         } else {
 669                 /*
 670                  * if some other thread got immu before
 671                  * us, it should get the same results
 672                  */
 673                 if (immu_devi->imd_immu != immu) {
 674                         ddi_err(DER_PANIC, dip, "Multiple "
 675                             "immu units found for device. Expected (%p), "
 676                             "actual (%p)", (void *)immu,
 677                             (void *)immu_devi->imd_immu);
 678                         mutex_exit(&(DEVI(dip)->devi_lock));
 679                         /*NOTREACHED*/
 680                 }
 681         }
 682         mutex_exit(&(DEVI(dip)->devi_lock));
 683 
 684         return (immu);
 685 }
 686 
 687 
 688 /* ############################# IMMU_DEVI code ############################ */
 689 
 690 /*
 691  * Allocate a immu_devi structure and initialize it
 692  */
 693 static immu_devi_t *
 694 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
 695     immu_flags_t immu_flags)
 696 {
 697         uchar_t baseclass, subclass;
 698         uint_t classcode, revclass;
 699         immu_devi_t *immu_devi;
 700         boolean_t pciex = B_FALSE;
 701         int kmflags;
 702         boolean_t is_pcib = B_FALSE;
 703 
 704         /* bus ==  -1 indicate non-PCI device (no BDF) */
 705         ASSERT(bus == -1 || bus >= 0);
 706         ASSERT(dev >= 0);
 707         ASSERT(func >= 0);
 708 
 709         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 710         immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
 711         if (immu_devi == NULL) {
 712                 ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
 713                     "Intel IOMMU immu_devi structure");
 714                 return (NULL);
 715         }
 716         immu_devi->imd_dip = rdip;
 717         immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
 718         immu_devi->imd_bus = bus;
 719         immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
 720 
 721         if (bus == -1) {
 722                 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
 723                 return (immu_devi);
 724         }
 725 
 726         immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
 727         immu_devi->imd_sec = 0;
 728         immu_devi->imd_sub = 0;
 729 
 730         revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
 731 
 732         classcode = IMMU_PCI_REV2CLASS(revclass);
 733         baseclass = IMMU_PCI_CLASS2BASE(classcode);
 734         subclass = IMMU_PCI_CLASS2SUB(classcode);
 735 
 736         if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
 737 
 738                 immu_devi->imd_sec = pci_getb_func(bus, dev, func,
 739                     PCI_BCNF_SECBUS);
 740                 immu_devi->imd_sub = pci_getb_func(bus, dev, func,
 741                     PCI_BCNF_SUBBUS);
 742 
 743                 pciex = device_is_pciex(bus, dev, func, &is_pcib);
 744                 if (pciex  == B_TRUE && is_pcib == B_TRUE) {
 745                         immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
 746                 } else if (pciex == B_TRUE) {
 747                         immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
 748                 } else {
 749                         immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
 750                 }
 751         } else {
 752                 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
 753         }
 754 
 755         /* check for certain special devices */
 756         immu_devi->imd_display = device_is_display(classcode);
 757         immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
 758             (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
 759         immu_devi->imd_use_premap = device_use_premap(classcode);
 760 
 761         immu_devi->imd_domain = NULL;
 762 
 763         immu_devi->imd_dvma_flags = immu_global_dvma_flags;
 764 
 765         return (immu_devi);
 766 }
 767 
 768 static void
 769 destroy_immu_devi(immu_devi_t *immu_devi)
 770 {
 771         kmem_free(immu_devi, sizeof (immu_devi_t));
 772 }
 773 
 774 static domain_t *
 775 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
 776 {
 777         immu_devi_t *immu_devi;
 778         domain_t *domain;
 779         dev_info_t *ddip;
 780 
 781         *ddipp = NULL;
 782 
 783         immu_devi = immu_devi_get(rdip);
 784         if (immu_devi == NULL) {
 785                 return (NULL);
 786         }
 787 
 788         mutex_enter(&(DEVI(rdip)->devi_lock));
 789         domain = immu_devi->imd_domain;
 790         ddip = immu_devi->imd_ddip;
 791         mutex_exit(&(DEVI(rdip)->devi_lock));
 792 
 793         if (domain)
 794                 *ddipp = ddip;
 795 
 796         return (domain);
 797 
 798 }
 799 
 800 /* ############################# END IMMU_DEVI code ######################## */
 801 /* ############################# DOMAIN code ############################### */
 802 
 803 /*
 804  * This routine always succeeds
 805  */
 806 static int
 807 did_alloc(immu_t *immu, dev_info_t *rdip,
 808     dev_info_t *ddip, immu_flags_t immu_flags)
 809 {
 810         int did;
 811 
 812         did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
 813             (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
 814 
 815         if (did == 0) {
 816                 ddi_err(DER_WARN, rdip, "device domain-id alloc error"
 817                     " domain-device: %s%d. immu unit is %s. Using "
 818                     "unity domain with domain-id (%d)",
 819                     ddi_driver_name(ddip), ddi_get_instance(ddip),
 820                     immu->immu_name, immu->immu_unity_domain->dom_did);
 821                 did = immu->immu_unity_domain->dom_did;
 822         }
 823 
 824         return (did);
 825 }
 826 
 827 static int
 828 get_branch_domain(dev_info_t *pdip, void *arg)
 829 {
 830         immu_devi_t *immu_devi;
 831         domain_t *domain;
 832         dev_info_t *ddip;
 833         immu_t *immu;
 834         dvma_arg_t *dvp = (dvma_arg_t *)arg;
 835 
 836         /*
 837          * The field dvp->dva_rdip is a work-in-progress
 838          * and gets updated as we walk up the ancestor
 839          * tree. The final ddip is set only when we reach
 840          * the top of the tree. So the dvp->dva_ddip field cannot
 841          * be relied on until we reach the top of the field.
 842          */
 843 
 844         /* immu_devi may not be set. */
 845         immu_devi = immu_devi_get(pdip);
 846         if (immu_devi == NULL) {
 847                 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
 848                         dvp->dva_error = DDI_FAILURE;
 849                         return (DDI_WALK_TERMINATE);
 850                 }
 851         }
 852 
 853         immu_devi = immu_devi_get(pdip);
 854         immu = immu_devi->imd_immu;
 855         if (immu == NULL)
 856                 immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
 857 
 858         /*
 859          * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
 860          * terminate the walk (since the device under the PCIE bridge
 861          * is a PCIE device and has an independent entry in the
 862          * root/context table)
 863          */
 864         if (dvp->dva_rdip != pdip &&
 865             immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
 866                 return (DDI_WALK_TERMINATE);
 867         }
 868 
 869         /*
 870          * In order to be a domain-dim, it must be a PCI device i.e.
 871          * must have valid BDF. This also eliminates the root complex.
 872          */
 873         if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
 874             immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
 875                 ASSERT(immu_devi->imd_bus >= 0);
 876                 ASSERT(immu_devi->imd_devfunc >= 0);
 877                 dvp->dva_ddip = pdip;
 878         }
 879 
 880         if (immu_devi->imd_display == B_TRUE ||
 881             (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
 882                 dvp->dva_domain = immu->immu_unity_domain;
 883                 /* continue walking to find ddip */
 884                 return (DDI_WALK_CONTINUE);
 885         }
 886 
 887         mutex_enter(&(DEVI(pdip)->devi_lock));
 888         domain = immu_devi->imd_domain;
 889         ddip = immu_devi->imd_ddip;
 890         mutex_exit(&(DEVI(pdip)->devi_lock));
 891 
 892         if (domain && ddip) {
 893                 /* if domain is set, it must be the same */
 894                 if (dvp->dva_domain) {
 895                         ASSERT(domain == dvp->dva_domain);
 896                 }
 897                 dvp->dva_domain = domain;
 898                 dvp->dva_ddip = ddip;
 899                 return (DDI_WALK_TERMINATE);
 900         }
 901 
 902         /* Domain may already be set, continue walking so that ddip gets set */
 903         if (dvp->dva_domain) {
 904                 return (DDI_WALK_CONTINUE);
 905         }
 906 
 907         /* domain is not set in either immu_devi or dvp */
 908         domain = bdf_domain_lookup(immu_devi);
 909         if (domain == NULL) {
 910                 return (DDI_WALK_CONTINUE);
 911         }
 912 
 913         /* ok, the BDF hash had a domain for this BDF. */
 914 
 915         /* Grab lock again to check if something else set immu_devi fields */
 916         mutex_enter(&(DEVI(pdip)->devi_lock));
 917         if (immu_devi->imd_domain != NULL) {
 918                 dvp->dva_domain = domain;
 919         } else {
 920                 dvp->dva_domain = domain;
 921         }
 922         mutex_exit(&(DEVI(pdip)->devi_lock));
 923 
 924         /*
 925          * walk upwards until the topmost PCI bridge is found
 926          */
 927         return (DDI_WALK_CONTINUE);
 928 
 929 }
 930 
 931 static void
 932 map_unity_domain(domain_t *domain)
 933 {
 934         struct memlist *mp;
 935         uint64_t start;
 936         uint64_t npages;
 937         immu_dcookie_t dcookies[1] = {0};
 938         int dcount = 0;
 939 
 940         /*
 941          * UNITY arenas are a mirror of the physical memory
 942          * installed on the system.
 943          */
 944 
 945 #ifdef BUGGY_DRIVERS
 946         /*
 947          * Dont skip page0. Some broken HW/FW access it.
 948          */
 949         dcookies[0].dck_paddr = 0;
 950         dcookies[0].dck_npages = 1;
 951         dcount = 1;
 952         (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
 953             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
 954 #endif
 955 
 956         memlist_read_lock();
 957 
 958         mp = phys_install;
 959 
 960         if (mp->ml_address == 0) {
 961                 /* since we already mapped page1 above */
 962                 start = IMMU_PAGESIZE;
 963         } else {
 964                 start = mp->ml_address;
 965         }
 966         npages = mp->ml_size/IMMU_PAGESIZE + 1;
 967 
 968         dcookies[0].dck_paddr = start;
 969         dcookies[0].dck_npages = npages;
 970         dcount = 1;
 971         (void) dvma_map(domain, start, npages, dcookies,
 972             dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 973 
 974         ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
 975             " - 0x%" PRIx64 "]", start, start + mp->ml_size);
 976 
 977         mp = mp->ml_next;
 978         while (mp) {
 979                 ddi_err(DER_LOG, domain->dom_dip,
 980                     "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 981                     mp->ml_address, mp->ml_address + mp->ml_size);
 982 
 983                 start = mp->ml_address;
 984                 npages = mp->ml_size/IMMU_PAGESIZE + 1;
 985 
 986                 dcookies[0].dck_paddr = start;
 987                 dcookies[0].dck_npages = npages;
 988                 dcount = 1;
 989                 (void) dvma_map(domain, start, npages,
 990                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 991                 mp = mp->ml_next;
 992         }
 993 
 994         mp = bios_rsvd;
 995         while (mp) {
 996                 ddi_err(DER_LOG, domain->dom_dip,
 997                     "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 998                     mp->ml_address, mp->ml_address + mp->ml_size);
 999 
1000                 start = mp->ml_address;
1001                 npages = mp->ml_size/IMMU_PAGESIZE + 1;
1002 
1003                 dcookies[0].dck_paddr = start;
1004                 dcookies[0].dck_npages = npages;
1005                 dcount = 1;
1006                 (void) dvma_map(domain, start, npages,
1007                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1008 
1009                 mp = mp->ml_next;
1010         }
1011 
1012         memlist_read_unlock();
1013 }
1014 
1015 /*
1016  * create_xlate_arena()
1017  *      Create the dvma arena for a domain with translation
1018  *      mapping
1019  */
1020 static void
1021 create_xlate_arena(immu_t *immu, domain_t *domain,
1022     dev_info_t *rdip, immu_flags_t immu_flags)
1023 {
1024         char *arena_name;
1025         struct memlist *mp;
1026         int vmem_flags;
1027         uint64_t start;
1028         uint_t mgaw;
1029         uint64_t size;
1030         uint64_t maxaddr;
1031         void *vmem_ret;
1032 
1033         arena_name = domain->dom_dvma_arena_name;
1034 
1035         /* Note, don't do sizeof (arena_name) - it is just a pointer */
1036         (void) snprintf(arena_name,
1037             sizeof (domain->dom_dvma_arena_name),
1038             "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1039             domain->dom_did);
1040 
1041         vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1042 
1043         /* Restrict mgaddr (max guest addr) to MGAW */
1044         mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1045 
1046         /*
1047          * To ensure we avoid ioapic and PCI MMIO ranges we just
1048          * use the physical memory address range of the system as the
1049          * range
1050          */
1051         maxaddr = ((uint64_t)1 << mgaw);
1052 
1053         memlist_read_lock();
1054 
1055         mp = phys_install;
1056 
1057         if (mp->ml_address == 0)
1058                 start = MMU_PAGESIZE;
1059         else
1060                 start = mp->ml_address;
1061 
1062         if (start + mp->ml_size > maxaddr)
1063                 size = maxaddr - start;
1064         else
1065                 size = mp->ml_size;
1066 
1067         ddi_err(DER_VERB, rdip,
1068             "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1069             " - 0x%" PRIx64 "]", arena_name, start, start + size);
1070 
1071         /*
1072          * We always allocate in quanta of IMMU_PAGESIZE
1073          */
1074         domain->dom_dvma_arena = vmem_create(arena_name,
1075             (void *)(uintptr_t)start,   /* start addr */
1076             size,                       /* size */
1077             IMMU_PAGESIZE,              /* quantum */
1078             NULL,                       /* afunc */
1079             NULL,                       /* ffunc */
1080             NULL,                       /* source */
1081             0,                          /* qcache_max */
1082             vmem_flags);
1083 
1084         if (domain->dom_dvma_arena == NULL) {
1085                 ddi_err(DER_PANIC, rdip,
1086                     "Failed to allocate DVMA arena(%s) "
1087                     "for domain ID (%d)", arena_name, domain->dom_did);
1088                 /*NOTREACHED*/
1089         }
1090 
1091         mp = mp->ml_next;
1092         while (mp) {
1093 
1094                 if (mp->ml_address == 0)
1095                         start = MMU_PAGESIZE;
1096                 else
1097                         start = mp->ml_address;
1098 
1099                 if (start + mp->ml_size > maxaddr)
1100                         size = maxaddr - start;
1101                 else
1102                         size = mp->ml_size;
1103 
1104                 ddi_err(DER_VERB, rdip,
1105                     "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1106                     " - 0x%" PRIx64 "]", arena_name, start,
1107                     start + size);
1108 
1109                 vmem_ret = vmem_add(domain->dom_dvma_arena,
1110                     (void *)(uintptr_t)start, size,  vmem_flags);
1111 
1112                 if (vmem_ret == NULL) {
1113                         ddi_err(DER_PANIC, rdip,
1114                             "Failed to allocate DVMA arena(%s) "
1115                             "for domain ID (%d)",
1116                             arena_name, domain->dom_did);
1117                         /*NOTREACHED*/
1118                 }
1119                 mp = mp->ml_next;
1120         }
1121         memlist_read_unlock();
1122 }
1123 
1124 /* ################################### DOMAIN CODE ######################### */
1125 
1126 /*
1127  * Set the domain and domain-dip for a dip
1128  */
1129 static void
1130 set_domain(
1131         dev_info_t *dip,
1132         dev_info_t *ddip,
1133         domain_t *domain)
1134 {
1135         immu_devi_t *immu_devi;
1136         domain_t *fdomain;
1137         dev_info_t *fddip;
1138 
1139         immu_devi = immu_devi_get(dip);
1140 
1141         mutex_enter(&(DEVI(dip)->devi_lock));
1142         fddip = immu_devi->imd_ddip;
1143         fdomain = immu_devi->imd_domain;
1144 
1145         if (fddip) {
1146                 ASSERT(fddip == ddip);
1147         } else {
1148                 immu_devi->imd_ddip = ddip;
1149         }
1150 
1151         if (fdomain) {
1152                 ASSERT(fdomain == domain);
1153         } else {
1154                 immu_devi->imd_domain = domain;
1155         }
1156         mutex_exit(&(DEVI(dip)->devi_lock));
1157 }
1158 
1159 /*
1160  * device_domain()
1161  *      Get domain for a device. The domain may be global in which case it
1162  *      is shared between all IOMMU units. Due to potential AGAW differences
1163  *      between IOMMU units, such global domains *have to be* UNITY mapping
1164  *      domains. Alternatively, the domain may be local to a IOMMU unit.
1165  *      Local domains may be shared or immu_devi, although the
1166  *      scope of sharing
1167  *      is restricted to devices controlled by the IOMMU unit to
1168  *      which the domain
1169  *      belongs. If shared, they (currently) have to be UNITY domains. If
1170  *      immu_devi a domain may be either UNITY or translation (XLATE) domain.
1171  */
1172 static domain_t *
1173 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1174 {
1175         dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1176         immu_t *immu;
1177         domain_t *domain;
1178         dvma_arg_t dvarg = {0};
1179         int level;
1180 
1181         *ddipp = NULL;
1182 
1183         /*
1184          * Check if the domain is already set. This is usually true
1185          * if this is not the first DVMA transaction.
1186          */
1187         ddip = NULL;
1188         domain = immu_devi_domain(rdip, &ddip);
1189         if (domain) {
1190                 *ddipp = ddip;
1191                 return (domain);
1192         }
1193 
1194         immu = immu_dvma_get_immu(rdip, immu_flags);
1195         if (immu == NULL) {
1196                 /*
1197                  * possible that there is no IOMMU unit for this device
1198                  * - BIOS bugs are one example.
1199                  */
1200                 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1201                 return (NULL);
1202         }
1203 
1204         immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1205 
1206         dvarg.dva_rdip = rdip;
1207         dvarg.dva_ddip = NULL;
1208         dvarg.dva_domain = NULL;
1209         dvarg.dva_flags = immu_flags;
1210         level = 0;
1211         if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1212             &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1213                 /*
1214                  * maybe low memory. return error,
1215                  * so driver tries again later
1216                  */
1217                 return (NULL);
1218         }
1219 
1220         /* should have walked at least 1 dip (i.e. edip) */
1221         ASSERT(level > 0);
1222 
1223         ddip = dvarg.dva_ddip;  /* must be present */
1224         domain = dvarg.dva_domain;      /* may be NULL */
1225 
1226         /*
1227          * We may find the domain during our ancestor walk on any one of our
1228          * ancestor dips, If the domain is found then the domain-dip
1229          * (i.e. ddip) will also be found in the same immu_devi struct.
1230          * The domain-dip is the highest ancestor dip which shares the
1231          * same domain with edip.
1232          * The domain may or may not be found, but the domain dip must
1233          * be found.
1234          */
1235         if (ddip == NULL) {
1236                 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1237                 return (NULL);
1238         }
1239 
1240         /*
1241          * Did we find a domain ?
1242          */
1243         if (domain) {
1244                 goto found;
1245         }
1246 
1247         /* nope, so allocate */
1248         domain = domain_create(immu, ddip, rdip, immu_flags);
1249         if (domain == NULL) {
1250                 return (NULL);
1251         }
1252 
1253         /*FALLTHROUGH*/
1254 found:
1255         /*
1256          * We know *domain *is* the right domain, so panic if
1257          * another domain is set for either the request-dip or
1258          * effective dip.
1259          */
1260         set_domain(ddip, ddip, domain);
1261         set_domain(rdip, ddip, domain);
1262 
1263         *ddipp = ddip;
1264         return (domain);
1265 }
1266 
1267 static void
1268 create_unity_domain(immu_t *immu)
1269 {
1270         domain_t *domain;
1271 
1272         /* domain created during boot and always use sleep flag */
1273         domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1274 
1275         rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1276 
1277         domain->dom_did = IMMU_UNITY_DID;
1278         domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1279 
1280         domain->dom_immu = immu;
1281         immu->immu_unity_domain = domain;
1282 
1283         /*
1284          * Setup the domain's initial page table
1285          * should never fail.
1286          */
1287         domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1288         pgtable_zero(domain->dom_pgtable_root);
1289 
1290         /*
1291          * Only map all physical memory in to the unity domain
1292          * if passthrough is not supported. If it is supported,
1293          * passthrough is set in the context entry instead.
1294          */
1295         if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1296                 map_unity_domain(domain);
1297 
1298 
1299         /*
1300          * put it on the system-wide UNITY domain list
1301          */
1302         mutex_enter(&(immu_domain_lock));
1303         list_insert_tail(&immu_unity_domain_list, domain);
1304         mutex_exit(&(immu_domain_lock));
1305 }
1306 
1307 /*
1308  * ddip is the domain-dip - the topmost dip in a domain
1309  * rdip is the requesting-dip - the device which is
1310  * requesting DVMA setup
1311  * if domain is a non-shared domain rdip == ddip
1312  */
1313 static domain_t *
1314 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1315     immu_flags_t immu_flags)
1316 {
1317         int kmflags;
1318         domain_t *domain;
1319         char mod_hash_name[128];
1320         immu_devi_t *immu_devi;
1321         int did;
1322         immu_dcookie_t dcookies[1] = {0};
1323         int dcount = 0;
1324 
1325         immu_devi = immu_devi_get(rdip);
1326 
1327         /*
1328          * First allocate a domainid.
1329          * This routine will never fail, since if we run out
1330          * of domains the unity domain will be allocated.
1331          */
1332         did = did_alloc(immu, rdip, ddip, immu_flags);
1333         if (did == IMMU_UNITY_DID) {
1334                 /* domain overflow */
1335                 ASSERT(immu->immu_unity_domain);
1336                 return (immu->immu_unity_domain);
1337         }
1338 
1339         kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1340         domain = kmem_zalloc(sizeof (domain_t), kmflags);
1341         if (domain == NULL) {
1342                 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1343                     "structure for device. IOMMU unit: %s", immu->immu_name);
1344                 /*NOTREACHED*/
1345         }
1346 
1347         rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1348 
1349         (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1350             "immu%s-domain%d-pava-hash", immu->immu_name, did);
1351 
1352         domain->dom_did = did;
1353         domain->dom_immu = immu;
1354         domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1355         domain->dom_dip = ddip;
1356 
1357         /*
1358          * Create xlate DVMA arena for this domain.
1359          */
1360         create_xlate_arena(immu, domain, rdip, immu_flags);
1361 
1362         /*
1363          * Setup the domain's initial page table
1364          */
1365         domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1366         if (domain->dom_pgtable_root == NULL) {
1367                 ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1368                     "pgtable for domain (%d). IOMMU unit: %s",
1369                     domain->dom_did, immu->immu_name);
1370                 /*NOTREACHED*/
1371         }
1372         pgtable_zero(domain->dom_pgtable_root);
1373 
1374         /*
1375          * Since this is a immu unit-specific domain, put it on
1376          * the per-immu domain list.
1377          */
1378         mutex_enter(&(immu->immu_lock));
1379         list_insert_head(&immu->immu_domain_list, domain);
1380         mutex_exit(&(immu->immu_lock));
1381 
1382         /*
1383          * Also put it on the system-wide xlate domain list
1384          */
1385         mutex_enter(&(immu_domain_lock));
1386         list_insert_head(&immu_xlate_domain_list, domain);
1387         mutex_exit(&(immu_domain_lock));
1388 
1389         bdf_domain_insert(immu_devi, domain);
1390 
1391 #ifdef BUGGY_DRIVERS
1392         /*
1393          * Map page0. Some broken HW/FW access it.
1394          */
1395         dcookies[0].dck_paddr = 0;
1396         dcookies[0].dck_npages = 1;
1397         dcount = 1;
1398         (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1399             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1400 #endif
1401         return (domain);
1402 }
1403 
1404 /*
1405  * Create domainid arena.
1406  * Domainid 0 is reserved by Vt-d spec and cannot be used by
1407  * system software.
1408  * Domainid 1 is reserved by solaris and used for *all* of the following:
1409  *      as the "uninitialized" domain - For devices not yet controlled
1410  *      by Solaris
1411  *      as the "unity" domain - For devices that will always belong
1412  *      to the unity domain
1413  *      as the "overflow" domain - Used for any new device after we
1414  *      run out of domains
1415  * All of the above domains map into a single domain with
1416  * domainid 1 and UNITY DVMA mapping
1417  * Each IMMU unity has its own unity/uninit/overflow domain
1418  */
1419 static void
1420 did_init(immu_t *immu)
1421 {
1422         (void) snprintf(immu->immu_did_arena_name,
1423             sizeof (immu->immu_did_arena_name),
1424             "%s_domainid_arena", immu->immu_name);
1425 
1426         ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1427             immu->immu_did_arena_name);
1428 
1429         immu->immu_did_arena = vmem_create(
1430             immu->immu_did_arena_name,
1431             (void *)(uintptr_t)(IMMU_UNITY_DID + 1),   /* start addr */
1432             immu->immu_max_domains - IMMU_UNITY_DID,
1433             1,                          /* quantum */
1434             NULL,                       /* afunc */
1435             NULL,                       /* ffunc */
1436             NULL,                       /* source */
1437             0,                          /* qcache_max */
1438             VM_SLEEP);
1439 
1440         /* Even with SLEEP flag, vmem_create() can fail */
1441         if (immu->immu_did_arena == NULL) {
1442                 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1443                     "IOMMU domainid allocator: %s", immu->immu_name,
1444                     immu->immu_did_arena_name);
1445         }
1446 }
1447 
1448 /* #########################  CONTEXT CODE ################################# */
1449 
1450 static void
1451 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1452     int bus, int devfunc)
1453 {
1454         pgtable_t *context;
1455         pgtable_t *pgtable_root;
1456         hw_rce_t *hw_rent;
1457         hw_rce_t *hw_cent;
1458         hw_rce_t *ctxp;
1459         int sid;
1460         krw_t rwtype;
1461         boolean_t fill_root;
1462         boolean_t fill_ctx;
1463 
1464         pgtable_root = domain->dom_pgtable_root;
1465 
1466         ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1467         context = *(pgtable_t **)(ctxp + bus);
1468         hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1469 
1470         fill_root = B_FALSE;
1471         fill_ctx = B_FALSE;
1472 
1473         /* Check the most common case first with reader lock */
1474         rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1475         rwtype = RW_READER;
1476 again:
1477         if (ROOT_GET_P(hw_rent)) {
1478                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1479                 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1480                         rw_exit(&(immu->immu_ctx_rwlock));
1481                         return;
1482                 } else {
1483                         fill_ctx = B_TRUE;
1484                 }
1485         } else {
1486                 fill_root = B_TRUE;
1487                 fill_ctx = B_TRUE;
1488         }
1489 
1490         if (rwtype == RW_READER &&
1491             rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1492                 rw_exit(&(immu->immu_ctx_rwlock));
1493                 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1494                 rwtype = RW_WRITER;
1495                 goto again;
1496         }
1497         rwtype = RW_WRITER;
1498 
1499         if (fill_root == B_TRUE) {
1500                 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1501                 ROOT_SET_P(hw_rent);
1502                 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1503         }
1504 
1505         if (fill_ctx == B_TRUE) {
1506                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1507                 /* need to disable context entry before reprogramming it */
1508                 bzero(hw_cent, sizeof (hw_rce_t));
1509 
1510                 /* flush caches */
1511                 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1512 
1513                 sid = ((bus << 8) | devfunc);
1514                 immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1515                     &immu->immu_ctx_inv_wait);
1516 
1517                 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1518                 CONT_SET_DID(hw_cent, domain->dom_did);
1519                 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1520                 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1521                 if (domain->dom_did == IMMU_UNITY_DID &&
1522                     IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1523                         CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1524                 else
1525                         /*LINTED*/
1526                         CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1527                 CONT_SET_P(hw_cent);
1528                 if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1529                         CONT_SET_EH(hw_cent);
1530                         if (immu_use_alh)
1531                                 CONT_SET_ALH(hw_cent);
1532                 }
1533                 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1534         }
1535         rw_exit(&(immu->immu_ctx_rwlock));
1536 }
1537 
1538 static pgtable_t *
1539 context_create(immu_t *immu)
1540 {
1541         int     bus;
1542         int     devfunc;
1543         pgtable_t *root_table;
1544         pgtable_t *context;
1545         pgtable_t *pgtable_root;
1546         hw_rce_t *ctxp;
1547         hw_rce_t *hw_rent;
1548         hw_rce_t *hw_cent;
1549 
1550         /* Allocate a zeroed root table (4K 256b entries) */
1551         root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1552         pgtable_zero(root_table);
1553 
1554         /*
1555          * Setup context tables for all possible root table entries.
1556          * Start out with unity domains for all entries.
1557          */
1558         ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1559         hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1560         for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1561                 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1562                 pgtable_zero(context);
1563                 ROOT_SET_P(hw_rent);
1564                 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1565                 hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1566                 for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1567                     devfunc++, hw_cent++) {
1568                         pgtable_root =
1569                             immu->immu_unity_domain->dom_pgtable_root;
1570                         CONT_SET_DID(hw_cent,
1571                             immu->immu_unity_domain->dom_did);
1572                         CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1573                         CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1574                         if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1575                                 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1576                         else
1577                                 /*LINTED*/
1578                                 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1579                         CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1580                         CONT_SET_P(hw_cent);
1581                 }
1582                 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1583                 *((pgtable_t **)ctxp) = context;
1584         }
1585 
1586         return (root_table);
1587 }
1588 
1589 /*
1590  * Called during rootnex attach, so no locks needed
1591  */
1592 static void
1593 context_init(immu_t *immu)
1594 {
1595         rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1596 
1597         immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1598 
1599         immu_regs_wbf_flush(immu);
1600 
1601         immu->immu_ctx_root = context_create(immu);
1602 
1603         immu_regs_set_root_table(immu);
1604 
1605         rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1606         immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1607         immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1608         rw_exit(&(immu->immu_ctx_rwlock));
1609 }
1610 
1611 
1612 /*
1613  * Find top pcib
1614  */
1615 static int
1616 find_top_pcib(dev_info_t *dip, void *arg)
1617 {
1618         immu_devi_t *immu_devi;
1619         dev_info_t **pcibdipp = (dev_info_t **)arg;
1620 
1621         immu_devi = immu_devi_get(dip);
1622 
1623         if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1624                 *pcibdipp = dip;
1625         }
1626 
1627         return (DDI_WALK_CONTINUE);
1628 }
1629 
1630 static int
1631 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1632     dev_info_t *rdip, immu_flags_t immu_flags)
1633 {
1634         immu_devi_t *r_immu_devi;
1635         immu_devi_t *d_immu_devi;
1636         int r_bus;
1637         int d_bus;
1638         int r_devfunc;
1639         int d_devfunc;
1640         immu_pcib_t d_pcib_type;
1641         dev_info_t *pcibdip;
1642 
1643         if (ddip == NULL || rdip == NULL ||
1644             ddip == root_devinfo || rdip == root_devinfo) {
1645                 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1646                     "request-dip are NULL or are root devinfo");
1647                 return (DDI_FAILURE);
1648         }
1649 
1650         /*
1651          * We need to set the context fields
1652          * based on what type of device rdip and ddip are.
1653          * To do that we need the immu_devi field.
1654          * Set the immu_devi field (if not already set)
1655          */
1656         if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1657                 ddi_err(DER_MODE, rdip,
1658                     "immu_context_update: failed to set immu_devi for ddip");
1659                 return (DDI_FAILURE);
1660         }
1661 
1662         if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1663                 ddi_err(DER_MODE, rdip,
1664                     "immu_context_update: failed to set immu_devi for rdip");
1665                 return (DDI_FAILURE);
1666         }
1667 
1668         d_immu_devi = immu_devi_get(ddip);
1669         r_immu_devi = immu_devi_get(rdip);
1670 
1671         d_bus = d_immu_devi->imd_bus;
1672         d_devfunc = d_immu_devi->imd_devfunc;
1673         d_pcib_type = d_immu_devi->imd_pcib_type;
1674         r_bus = r_immu_devi->imd_bus;
1675         r_devfunc = r_immu_devi->imd_devfunc;
1676 
1677         if (rdip == ddip) {
1678                 /* rdip is a PCIE device. set context for it only */
1679                 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1680                     r_devfunc);
1681 #ifdef BUGGY_DRIVERS
1682         } else if (r_immu_devi == d_immu_devi) {
1683 #ifdef TEST
1684                 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1685                     "0x%lx are identical", rdip, ddip);
1686 #endif
1687                 /* rdip is a PCIE device. set context for it only */
1688                 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1689                     r_devfunc);
1690 #endif
1691         } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1692                 /*
1693                  * ddip is a PCIE_PCI bridge. Set context for ddip's
1694                  * secondary bus. If rdip is on ddip's secondary
1695                  * bus, set context for rdip. Else, set context
1696                  * for rdip's PCI bridge on ddip's secondary bus.
1697                  */
1698                 context_set(immu, domain, immu->immu_ctx_root,
1699                     d_immu_devi->imd_sec, 0);
1700                 if (d_immu_devi->imd_sec == r_bus) {
1701                         context_set(immu, domain, immu->immu_ctx_root,
1702                             r_bus, r_devfunc);
1703                 } else {
1704                         pcibdip = NULL;
1705                         if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1706                             &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1707                             pcibdip != NULL) {
1708                                 r_immu_devi = immu_devi_get(pcibdip);
1709                                 r_bus = r_immu_devi->imd_bus;
1710                                 r_devfunc = r_immu_devi->imd_devfunc;
1711                                 context_set(immu, domain, immu->immu_ctx_root,
1712                                     r_bus, r_devfunc);
1713                         } else {
1714                                 ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1715                                     " bridge for PCI device");
1716                                 /*NOTREACHED*/
1717                         }
1718                 }
1719         } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1720                 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1721                     d_devfunc);
1722         } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1723                 /*
1724                  * ddip is a PCIE device which has a non-PCI device under it
1725                  * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1726                  */
1727                 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1728                     d_devfunc);
1729         } else {
1730                 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1731                     "set iommu context.");
1732                 /*NOTREACHED*/
1733         }
1734 
1735         /* XXX do we need a membar_producer() here */
1736         return (DDI_SUCCESS);
1737 }
1738 
1739 /* ##################### END CONTEXT CODE ################################## */
1740 /* ##################### MAPPING CODE ################################## */
1741 
1742 
1743 #ifdef DEBUG
1744 static boolean_t
1745 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1746     dev_info_t *rdip, immu_flags_t immu_flags)
1747 {
1748         /* The PDTE must be set i.e. present bit is set */
1749         if (!PDTE_P(pdte)) {
1750                 ddi_err(DER_MODE, rdip, "No present flag");
1751                 return (B_FALSE);
1752         }
1753 
1754         /*
1755          * Just assert to check most significant system software field
1756          * (PDTE_SW4) as it is same as present bit and we
1757          * checked that above
1758          */
1759         ASSERT(PDTE_SW4(pdte));
1760 
1761         /*
1762          * TM field should be clear if not reserved.
1763          * non-leaf is always reserved
1764          */
1765         if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1766                 if (PDTE_TM(pdte)) {
1767                         ddi_err(DER_MODE, rdip, "TM flag set");
1768                         return (B_FALSE);
1769                 }
1770         }
1771 
1772         /*
1773          * The SW3 field is not used and must be clear
1774          */
1775         if (PDTE_SW3(pdte)) {
1776                 ddi_err(DER_MODE, rdip, "SW3 set");
1777                 return (B_FALSE);
1778         }
1779 
1780         /*
1781          * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1782          */
1783         if (next == NULL) {
1784                 ASSERT(paddr % IMMU_PAGESIZE == 0);
1785                 if (PDTE_PADDR(pdte) != paddr) {
1786                         ddi_err(DER_MODE, rdip,
1787                             "PTE paddr mismatch: %lx != %lx",
1788                             PDTE_PADDR(pdte), paddr);
1789                         return (B_FALSE);
1790                 }
1791         } else {
1792                 if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1793                         ddi_err(DER_MODE, rdip,
1794                             "PDE paddr mismatch: %lx != %lx",
1795                             PDTE_PADDR(pdte), next->hwpg_paddr);
1796                         return (B_FALSE);
1797                 }
1798         }
1799 
1800         /*
1801          * SNP field should be clear if not reserved.
1802          * non-leaf is always reserved
1803          */
1804         if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1805                 if (PDTE_SNP(pdte)) {
1806                         ddi_err(DER_MODE, rdip, "SNP set");
1807                         return (B_FALSE);
1808                 }
1809         }
1810 
1811         /* second field available for system software should be clear */
1812         if (PDTE_SW2(pdte)) {
1813                 ddi_err(DER_MODE, rdip, "SW2 set");
1814                 return (B_FALSE);
1815         }
1816 
1817         /* Super pages field should be clear */
1818         if (PDTE_SP(pdte)) {
1819                 ddi_err(DER_MODE, rdip, "SP set");
1820                 return (B_FALSE);
1821         }
1822 
1823         /*
1824          * least significant field available for
1825          * system software should be clear
1826          */
1827         if (PDTE_SW1(pdte)) {
1828                 ddi_err(DER_MODE, rdip, "SW1 set");
1829                 return (B_FALSE);
1830         }
1831 
1832         if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1833                 ddi_err(DER_MODE, rdip, "READ not set");
1834                 return (B_FALSE);
1835         }
1836 
1837         if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1838                 ddi_err(DER_MODE, rdip, "WRITE not set");
1839                 return (B_FALSE);
1840         }
1841 
1842         return (B_TRUE);
1843 }
1844 #endif
1845 
1846 /*ARGSUSED*/
1847 static void
1848 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1849     uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1850 {
1851         uint64_t npages;
1852         uint64_t dvma;
1853         pgtable_t *pgtable;
1854         hw_pdte_t *hwp;
1855         hw_pdte_t *shwp;
1856         int idx;
1857 
1858         pgtable = xlate->xlt_pgtable;
1859         idx = xlate->xlt_idx;
1860 
1861         dvma = *dvma_ptr;
1862         npages = *npages_ptr;
1863 
1864         /*
1865          * since a caller gets a unique dvma for a physical address,
1866          * no other concurrent thread will be writing to the same
1867          * PTE even if it has the same paddr. So no locks needed.
1868          */
1869         shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1870 
1871         hwp = shwp;
1872         for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1873                 PDTE_CLEAR_P(*hwp);
1874                 dvma += IMMU_PAGESIZE;
1875                 npages--;
1876         }
1877 
1878         *dvma_ptr = dvma;
1879         *npages_ptr = npages;
1880 
1881         xlate->xlt_idx = idx;
1882 }
1883 
1884 static void
1885 xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1886 {
1887         int level;
1888         uint64_t offbits;
1889 
1890         /*
1891          * Skip the first 12 bits which is the offset into
1892          * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1893          */
1894         offbits = dvma >> IMMU_PAGESHIFT;
1895 
1896         /* skip to level 1 i.e. leaf PTE */
1897         for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1898                 xlate->xlt_level = level;
1899                 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1900                 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1901                 xlate->xlt_pgtable = NULL;
1902                 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1903         }
1904 }
1905 
1906 /*
1907  * Read the pgtables
1908  */
1909 static boolean_t
1910 PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1911 {
1912         pgtable_t *pgtable;
1913         pgtable_t *next;
1914         uint_t idx;
1915 
1916         /* start with highest level pgtable i.e. root */
1917         xlate += nlevels;
1918 
1919         if (xlate->xlt_pgtable == NULL) {
1920                 xlate->xlt_pgtable = domain->dom_pgtable_root;
1921         }
1922 
1923         for (; xlate->xlt_level > 1; xlate--) {
1924                 idx = xlate->xlt_idx;
1925                 pgtable = xlate->xlt_pgtable;
1926 
1927                 if ((xlate - 1)->xlt_pgtable) {
1928                         continue;
1929                 }
1930 
1931                 /* Lock the pgtable in read mode */
1932                 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1933 
1934                 /*
1935                  * since we are unmapping, the pgtable should
1936                  * already point to a leafier pgtable.
1937                  */
1938                 next = *(pgtable->swpg_next_array + idx);
1939                 (xlate - 1)->xlt_pgtable = next;
1940                 rw_exit(&(pgtable->swpg_rwlock));
1941                 if (next == NULL)
1942                         return (B_FALSE);
1943         }
1944 
1945         return (B_TRUE);
1946 }
1947 
1948 static void
1949 immu_fault_walk(void *arg, void *base, size_t len)
1950 {
1951         uint64_t dvma, start;
1952 
1953         dvma = *(uint64_t *)arg;
1954         start = (uint64_t)(uintptr_t)base;
1955 
1956         if (dvma >= start && dvma < (start + len)) {
1957                 ddi_err(DER_WARN, NULL,
1958                     "faulting DVMA address is in vmem arena "
1959                     "(%" PRIx64 "-%" PRIx64 ")",
1960                     start, start + len);
1961                 *(uint64_t *)arg = ~0ULL;
1962         }
1963 }
1964 
1965 void
1966 immu_print_fault_info(uint_t sid, uint64_t dvma)
1967 {
1968         int nlevels;
1969         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1970         xlate_t *xlatep;
1971         hw_pdte_t pte;
1972         domain_t *domain;
1973         immu_t *immu;
1974         uint64_t dvma_arg;
1975 
1976         if (mod_hash_find(bdf_domain_hash,
1977             (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1978                 ddi_err(DER_WARN, NULL,
1979                     "no domain for faulting SID %08x", sid);
1980                 return;
1981         }
1982 
1983         immu = domain->dom_immu;
1984 
1985         dvma_arg = dvma;
1986         vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1987             (void *)&dvma_arg);
1988         if (dvma_arg != ~0ULL)
1989                 ddi_err(DER_WARN, domain->dom_dip,
1990                     "faulting DVMA address is not in vmem arena");
1991 
1992         nlevels = immu->immu_dvma_nlevels;
1993         xlate_setup(dvma, xlate, nlevels);
1994 
1995         if (!PDE_lookup(domain, xlate, nlevels)) {
1996                 ddi_err(DER_WARN, domain->dom_dip,
1997                     "pte not found in domid %d for faulting addr %" PRIx64,
1998                     domain->dom_did, dvma);
1999                 return;
2000         }
2001 
2002         xlatep = &xlate[1];
2003         pte = *((hw_pdte_t *)
2004             (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2005 
2006         ddi_err(DER_WARN, domain->dom_dip,
2007             "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2008             (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2009 }
2010 
2011 /*ARGSUSED*/
2012 static void
2013 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2014     dev_info_t *rdip, immu_flags_t immu_flags)
2015 {
2016         hw_pdte_t pte;
2017 
2018 #ifndef DEBUG
2019         pte = immu->immu_ptemask;
2020         PDTE_SET_PADDR(pte, paddr);
2021 #else
2022         pte = *hwp;
2023 
2024         if (PDTE_P(pte)) {
2025                 if (PDTE_PADDR(pte) != paddr) {
2026                         ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2027                             PDTE_PADDR(pte), paddr);
2028                 }
2029 #ifdef BUGGY_DRIVERS
2030                 return;
2031 #else
2032                 goto out;
2033 #endif
2034         }
2035 
2036         /* clear TM field if not reserved */
2037         if (immu->immu_TM_reserved == B_FALSE) {
2038                 PDTE_CLEAR_TM(pte);
2039         }
2040 
2041         /* Clear 3rd field for system software  - not used */
2042         PDTE_CLEAR_SW3(pte);
2043 
2044         /* Set paddr */
2045         ASSERT(paddr % IMMU_PAGESIZE == 0);
2046         PDTE_CLEAR_PADDR(pte);
2047         PDTE_SET_PADDR(pte, paddr);
2048 
2049         /*  clear SNP field if not reserved. */
2050         if (immu->immu_SNP_reserved == B_FALSE) {
2051                 PDTE_CLEAR_SNP(pte);
2052         }
2053 
2054         /* Clear SW2 field available for software */
2055         PDTE_CLEAR_SW2(pte);
2056 
2057 
2058         /* SP is don't care for PTEs. Clear it for cleanliness */
2059         PDTE_CLEAR_SP(pte);
2060 
2061         /* Clear SW1 field available for software */
2062         PDTE_CLEAR_SW1(pte);
2063 
2064         /*
2065          * Now that we are done writing the PTE
2066          * set the "present" flag. Note this present
2067          * flag is a bit in the PDE/PTE that the
2068          * spec says is available for system software.
2069          * This is an implementation detail of Solaris
2070          * bare-metal Intel IOMMU.
2071          * The present field in a PDE/PTE is not defined
2072          * by the Vt-d spec
2073          */
2074 
2075         PDTE_SET_P(pte);
2076 
2077         pte |= immu->immu_ptemask;
2078 
2079 out:
2080 #endif /* DEBUG */
2081 #ifdef BUGGY_DRIVERS
2082         PDTE_SET_READ(pte);
2083         PDTE_SET_WRITE(pte);
2084 #else
2085         if (immu_flags & IMMU_FLAGS_READ)
2086                 PDTE_SET_READ(pte);
2087         if (immu_flags & IMMU_FLAGS_WRITE)
2088                 PDTE_SET_WRITE(pte);
2089 #endif /* BUGGY_DRIVERS */
2090 
2091         *hwp = pte;
2092 }
2093 
2094 /*ARGSUSED*/
2095 static void
2096 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2097     uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2098     int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2099 {
2100         paddr_t paddr;
2101         uint64_t nvpages;
2102         uint64_t nppages;
2103         uint64_t dvma;
2104         pgtable_t *pgtable;
2105         hw_pdte_t *hwp;
2106         hw_pdte_t *shwp;
2107         int idx, nset;
2108         int j;
2109 
2110         pgtable = xlate->xlt_pgtable;
2111         idx = xlate->xlt_idx;
2112 
2113         dvma = *dvma_ptr;
2114         nvpages = *nvpages_ptr;
2115 
2116         /*
2117          * since a caller gets a unique dvma for a physical address,
2118          * no other concurrent thread will be writing to the same
2119          * PTE even if it has the same paddr. So no locks needed.
2120          */
2121         shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2122 
2123         hwp = shwp;
2124         for (j = dcount - 1; j >= 0; j--) {
2125                 if (nvpages <= dcookies[j].dck_npages)
2126                         break;
2127                 nvpages -= dcookies[j].dck_npages;
2128         }
2129 
2130         VERIFY(j >= 0);
2131         nppages = nvpages;
2132         paddr = dcookies[j].dck_paddr +
2133             (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2134 
2135         nvpages = *nvpages_ptr;
2136         nset = 0;
2137         for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2138                 PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2139                 nset++;
2140 
2141                 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2142                     == B_TRUE);
2143                 nppages--;
2144                 nvpages--;
2145                 paddr += IMMU_PAGESIZE;
2146                 dvma += IMMU_PAGESIZE;
2147 
2148                 if (nppages == 0) {
2149                         j++;
2150                 }
2151 
2152                 if (j == dcount)
2153                         break;
2154 
2155                 if (nppages == 0) {
2156                         nppages = dcookies[j].dck_npages;
2157                         paddr = dcookies[j].dck_paddr;
2158                 }
2159         }
2160 
2161         if (nvpages) {
2162                 *dvma_ptr = dvma;
2163                 *nvpages_ptr = nvpages;
2164         } else {
2165                 *dvma_ptr = 0;
2166                 *nvpages_ptr = 0;
2167         }
2168 
2169         xlate->xlt_idx = idx;
2170 }
2171 
2172 /*ARGSUSED*/
2173 static void
2174 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2175     dev_info_t *rdip, immu_flags_t immu_flags)
2176 {
2177         hw_pdte_t pde;
2178 
2179         pde = *hwp;
2180 
2181         /* if PDE is already set, make sure it is correct */
2182         if (PDTE_P(pde)) {
2183                 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2184 #ifdef BUGGY_DRIVERS
2185                 return;
2186 #else
2187                 goto out;
2188 #endif
2189         }
2190 
2191         /* Dont touch SW4, it is the present bit */
2192 
2193         /* don't touch TM field it is reserved for PDEs */
2194 
2195         /* 3rd field available for system software is not used */
2196         PDTE_CLEAR_SW3(pde);
2197 
2198         /* Set next level pgtable-paddr for PDE */
2199         PDTE_CLEAR_PADDR(pde);
2200         PDTE_SET_PADDR(pde, next->hwpg_paddr);
2201 
2202         /* don't touch SNP field it is reserved for PDEs */
2203 
2204         /* Clear second field available for system software */
2205         PDTE_CLEAR_SW2(pde);
2206 
2207         /* No super pages for PDEs */
2208         PDTE_CLEAR_SP(pde);
2209 
2210         /* Clear SW1 for software */
2211         PDTE_CLEAR_SW1(pde);
2212 
2213         /*
2214          * Now that we are done writing the PDE
2215          * set the "present" flag. Note this present
2216          * flag is a bit in the PDE/PTE that the
2217          * spec says is available for system software.
2218          * This is an implementation detail of Solaris
2219          * base-metal Intel IOMMU.
2220          * The present field in a PDE/PTE is not defined
2221          * by the Vt-d spec
2222          */
2223 
2224 out:
2225 #ifdef  BUGGY_DRIVERS
2226         PDTE_SET_READ(pde);
2227         PDTE_SET_WRITE(pde);
2228 #else
2229         if (immu_flags & IMMU_FLAGS_READ)
2230                 PDTE_SET_READ(pde);
2231         if (immu_flags & IMMU_FLAGS_WRITE)
2232                 PDTE_SET_WRITE(pde);
2233 #endif
2234 
2235         PDTE_SET_P(pde);
2236 
2237         *hwp = pde;
2238 }
2239 
2240 /*
2241  * Used to set PDEs
2242  */
2243 static boolean_t
2244 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2245     dev_info_t *rdip, immu_flags_t immu_flags)
2246 {
2247         pgtable_t *pgtable;
2248         pgtable_t *new;
2249         pgtable_t *next;
2250         hw_pdte_t *hwp;
2251         int level;
2252         uint_t idx;
2253         krw_t rwtype;
2254         boolean_t set = B_FALSE;
2255 
2256         /* start with highest level pgtable i.e. root */
2257         xlate += nlevels;
2258 
2259         new = NULL;
2260         xlate->xlt_pgtable = domain->dom_pgtable_root;
2261         for (level = nlevels; level > 1; level--, xlate--) {
2262                 idx = xlate->xlt_idx;
2263                 pgtable = xlate->xlt_pgtable;
2264 
2265                 /* Lock the pgtable in READ mode first */
2266                 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2267                 rwtype = RW_READER;
2268 again:
2269                 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2270                 next = (pgtable->swpg_next_array)[idx];
2271 
2272                 /*
2273                  * check if leafier level already has a pgtable
2274                  * if yes, verify
2275                  */
2276                 if (next == NULL) {
2277                         if (new == NULL) {
2278 
2279                                 IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2280                                     rdip, int, level);
2281 
2282                                 new = pgtable_alloc(immu, immu_flags);
2283                                 if (new == NULL) {
2284                                         ddi_err(DER_PANIC, rdip,
2285                                             "pgtable alloc err");
2286                                 }
2287                                 pgtable_zero(new);
2288                         }
2289 
2290                         /* Change to a write lock */
2291                         if (rwtype == RW_READER &&
2292                             rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2293                                 rw_exit(&(pgtable->swpg_rwlock));
2294                                 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2295                                 rwtype = RW_WRITER;
2296                                 goto again;
2297                         }
2298                         rwtype = RW_WRITER;
2299                         next = new;
2300                         (pgtable->swpg_next_array)[idx] = next;
2301                         new = NULL;
2302                         PDE_set_one(immu, hwp, next, rdip, immu_flags);
2303                         set = B_TRUE;
2304                         rw_downgrade(&(pgtable->swpg_rwlock));
2305                         rwtype = RW_READER;
2306                 }
2307 #ifndef  BUGGY_DRIVERS
2308                 else {
2309                         hw_pdte_t pde = *hwp;
2310 
2311                         /*
2312                          * If buggy driver we already set permission
2313                          * READ+WRITE so nothing to do for that case
2314                          * XXX Check that read writer perms change before
2315                          * actually setting perms. Also need to hold lock
2316                          */
2317                         if (immu_flags & IMMU_FLAGS_READ)
2318                                 PDTE_SET_READ(pde);
2319                         if (immu_flags & IMMU_FLAGS_WRITE)
2320                                 PDTE_SET_WRITE(pde);
2321 
2322                         *hwp = pde;
2323                 }
2324 #endif
2325 
2326                 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2327                     == B_TRUE);
2328 
2329                 (xlate - 1)->xlt_pgtable = next;
2330                 rw_exit(&(pgtable->swpg_rwlock));
2331         }
2332 
2333         if (new) {
2334                 pgtable_free(immu, new);
2335         }
2336 
2337         return (set);
2338 }
2339 
2340 /*
2341  * dvma_map()
2342  *     map a contiguous range of DVMA pages
2343  *
2344  *     immu: IOMMU unit for which we are generating DVMA cookies
2345  *   domain: domain
2346  *    sdvma: Starting dvma
2347  *   spaddr: Starting paddr
2348  *   npages: Number of pages
2349  *     rdip: requesting device
2350  *     immu_flags: flags
2351  */
2352 static boolean_t
2353 dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2354     immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2355     immu_flags_t immu_flags)
2356 {
2357         uint64_t dvma;
2358         uint64_t n;
2359         immu_t *immu = domain->dom_immu;
2360         int nlevels = immu->immu_dvma_nlevels;
2361         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2362         boolean_t pde_set = B_FALSE;
2363 
2364         n = snvpages;
2365         dvma = sdvma;
2366 
2367         while (n > 0) {
2368                 xlate_setup(dvma, xlate, nlevels);
2369 
2370                 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2371                 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2372                     == B_TRUE) {
2373                         pde_set = B_TRUE;
2374                 }
2375 
2376                 /* set all matching ptes that fit into this leaf pgtable */
2377                 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2378                     dcount, rdip, immu_flags);
2379         }
2380 
2381         return (pde_set);
2382 }
2383 
2384 /*
2385  * dvma_unmap()
2386  *   unmap a range of DVMAs
2387  *
2388  * immu: IOMMU unit state
2389  * domain: domain for requesting device
2390  * ddip: domain-dip
2391  * dvma: starting DVMA
2392  * npages: Number of IMMU pages to be unmapped
2393  * rdip: requesting device
2394  */
2395 static void
2396 dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2397     dev_info_t *rdip)
2398 {
2399         immu_t *immu = domain->dom_immu;
2400         int nlevels = immu->immu_dvma_nlevels;
2401         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2402         uint64_t n;
2403         uint64_t dvma;
2404 
2405         dvma = sdvma;
2406         n = snpages;
2407 
2408         while (n > 0) {
2409                 /* setup the xlate array */
2410                 xlate_setup(dvma, xlate, nlevels);
2411 
2412                 /* just lookup existing pgtables. Should never fail */
2413                 if (!PDE_lookup(domain, xlate, nlevels))
2414                         ddi_err(DER_PANIC, rdip,
2415                             "PTE not found for addr %" PRIx64,
2416                             (unsigned long long)dvma);
2417 
2418                 /* clear all matching ptes that fit into this leaf pgtable */
2419                 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2420         }
2421 
2422         /* No need to flush IOTLB after unmap */
2423 }
2424 
2425 static uint64_t
2426 dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2427 {
2428         uint64_t dvma;
2429         size_t xsize, align;
2430         uint64_t minaddr, maxaddr;
2431 
2432         /* parameters */
2433         xsize = npages * IMMU_PAGESIZE;
2434         align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2435         minaddr = dma_attr->dma_attr_addr_lo;
2436         maxaddr = dma_attr->dma_attr_addr_hi + 1;
2437 
2438         /* handle the rollover cases */
2439         if (maxaddr < dma_attr->dma_attr_addr_hi) {
2440                 maxaddr = dma_attr->dma_attr_addr_hi;
2441         }
2442 
2443         /*
2444          * allocate from vmem arena.
2445          */
2446         dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2447             xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2448             (void *)(uintptr_t)maxaddr, kmf);
2449 
2450         return (dvma);
2451 }
2452 
2453 static void
2454 dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2455 {
2456         int nlevels;
2457         xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2458         uint64_t dvma, n;
2459         size_t xsize, align;
2460         uint64_t minaddr, maxaddr, dmamax;
2461         int on, npte, pindex;
2462         hw_pdte_t *shwp;
2463         immu_t *immu;
2464         domain_t *domain;
2465 
2466         /* parameters */
2467         domain = IMMU_DEVI(rdip)->imd_domain;
2468         immu = domain->dom_immu;
2469         nlevels = immu->immu_dvma_nlevels;
2470         xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2471         align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2472         minaddr = dma_attr->dma_attr_addr_lo;
2473         if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2474                 dmamax = dma_attr->dma_attr_seg;
2475         else
2476                 dmamax = dma_attr->dma_attr_addr_hi;
2477         maxaddr = dmamax + 1;
2478 
2479         if (maxaddr < dmamax)
2480                 maxaddr = dmamax;
2481 
2482         dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2483             xsize, align, 0, dma_attr->dma_attr_seg + 1,
2484             (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2485 
2486         ihp->ihp_predvma = dvma;
2487         ihp->ihp_npremapped = 0;
2488         if (dvma == 0)
2489                 return;
2490 
2491         n = IMMU_NPREPTES;
2492         pindex = 0;
2493 
2494         /*
2495          * Set up a mapping at address 0, just so that all PDPs get allocated
2496          * now. Although this initial mapping should never be used,
2497          * explicitly set it to read-only, just to be safe.
2498          */
2499         while (n > 0) {
2500                 xlate_setup(dvma, xlate, nlevels);
2501 
2502                 (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2503                     IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2504 
2505                 xlp = &xlate[1];
2506                 shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2507                     + xlp->xlt_idx;
2508                 on = n;
2509 
2510                 PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2511                     1, rdip, IMMU_FLAGS_READ);
2512 
2513                 npte = on - n;
2514 
2515                 while (npte > 0) {
2516                         ihp->ihp_preptes[pindex++] = shwp;
2517 #ifdef BUGGY_DRIVERS
2518                         PDTE_CLEAR_WRITE(*shwp);
2519 #endif
2520                         shwp++;
2521                         npte--;
2522                 }
2523         }
2524 }
2525 
2526 static void
2527 dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2528 {
2529         domain_t *domain;
2530 
2531         domain = IMMU_DEVI(rdip)->imd_domain;
2532 
2533         if (ihp->ihp_predvma != 0) {
2534                 dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2535                 vmem_free(domain->dom_dvma_arena,
2536                     (void *)(uintptr_t)ihp->ihp_predvma,
2537                     IMMU_NPREPTES * IMMU_PAGESIZE);
2538         }
2539 }
2540 
2541 static void
2542 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2543 {
2544         uint64_t size = npages * IMMU_PAGESIZE;
2545 
2546         if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2547                 return;
2548 
2549         vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2550 }
2551 
2552 static int
2553 immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2554     immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2555     ddi_dma_obj_t *dma_out)
2556 {
2557         domain_t *domain;
2558         immu_t *immu;
2559         immu_flags_t immu_flags;
2560         ddi_dma_atyp_t buftype;
2561         ddi_dma_obj_t *dmar_object;
2562         ddi_dma_attr_t *attrp;
2563         uint64_t offset, paddr, dvma, sdvma, rwmask;
2564         size_t npages, npgalloc;
2565         uint_t psize, size, pcnt, dmax;
2566         page_t **pparray;
2567         caddr_t vaddr;
2568         page_t *page;
2569         struct as *vas;
2570         immu_dcookie_t *dcookies;
2571         int pde_set;
2572 
2573         domain = IMMU_DEVI(rdip)->imd_domain;
2574         immu = domain->dom_immu;
2575         immu_flags = dma_to_immu_flags(dmareq);
2576 
2577         attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2578 
2579         dmar_object = &dmareq->dmar_object;
2580         pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2581         vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2582         buftype = dmar_object->dmao_type;
2583         size = dmar_object->dmao_size;
2584 
2585         IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2586             buftype, uint_t, size);
2587 
2588         dcookies = &ihp->ihp_dcookies[0];
2589 
2590         pcnt = dmax = 0;
2591 
2592         /* retrieve paddr, psize, offset from dmareq */
2593         if (buftype == DMA_OTYP_PAGES) {
2594                 page = dmar_object->dmao_obj.pp_obj.pp_pp;
2595                 offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2596                     MMU_PAGEOFFSET;
2597                 paddr = pfn_to_pa(page->p_pagenum) + offset;
2598                 psize = MIN((MMU_PAGESIZE - offset), size);
2599                 page = page->p_next;
2600                 vas = dmar_object->dmao_obj.virt_obj.v_as;
2601         } else {
2602                 if (vas == NULL) {
2603                         vas = &kas;
2604                 }
2605                 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2606                 if (pparray != NULL) {
2607                         paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2608                         psize = MIN((MMU_PAGESIZE - offset), size);
2609                         pcnt++;
2610                 } else {
2611                         paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2612                             vaddr)) + offset;
2613                         psize = MIN(size, (MMU_PAGESIZE - offset));
2614                         vaddr += psize;
2615                 }
2616         }
2617 
2618         npgalloc = IMMU_BTOPR(size + offset);
2619 
2620         if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2621 #ifdef BUGGY_DRIVERS
2622                 rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2623 #else
2624                 rwmask = immu->immu_ptemask;
2625                 if (immu_flags & IMMU_FLAGS_READ)
2626                         rwmask |= PDTE_MASK_R;
2627                 if (immu_flags & IMMU_FLAGS_WRITE)
2628                         rwmask |= PDTE_MASK_W;
2629 #endif
2630 #ifdef DEBUG
2631                 rwmask |= PDTE_MASK_P;
2632 #endif
2633                 sdvma = ihp->ihp_predvma;
2634                 ihp->ihp_npremapped = npgalloc;
2635                 *ihp->ihp_preptes[0] =
2636                     PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2637         } else {
2638                 ihp->ihp_npremapped = 0;
2639                 sdvma = dvma_alloc(domain, attrp, npgalloc,
2640                     dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2641                 if (sdvma == 0)
2642                         return (DDI_DMA_NORESOURCES);
2643 
2644                 dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2645                 dcookies[0].dck_npages = 1;
2646         }
2647 
2648         IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2649             uint64_t, sdvma);
2650 
2651         dvma = sdvma;
2652         pde_set = 0;
2653         npages = 1;
2654         size -= psize;
2655         while (size > 0) {
2656                 /* get the size for this page (i.e. partial or full page) */
2657                 psize = MIN(size, MMU_PAGESIZE);
2658                 if (buftype == DMA_OTYP_PAGES) {
2659                         /* get the paddr from the page_t */
2660                         paddr = pfn_to_pa(page->p_pagenum);
2661                         page = page->p_next;
2662                 } else if (pparray != NULL) {
2663                         /* index into the array of page_t's to get the paddr */
2664                         paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2665                         pcnt++;
2666                 } else {
2667                         /* call into the VM to get the paddr */
2668                         paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2669                         vaddr += psize;
2670                 }
2671 
2672                 if (ihp->ihp_npremapped > 0) {
2673                         *ihp->ihp_preptes[npages] =
2674                             PDTE_PADDR(paddr) | rwmask;
2675                 } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2676                         dcookies[dmax].dck_npages++;
2677                 } else {
2678                         /* No, we need a new dcookie */
2679                         if (dmax == (IMMU_NDCK - 1)) {
2680                                 /*
2681                                  * Ran out of dcookies. Map them now.
2682                                  */
2683                                 if (dvma_map(domain, dvma,
2684                                     npages, dcookies, dmax + 1, rdip,
2685                                     immu_flags))
2686                                         pde_set++;
2687 
2688                                 IMMU_DPROBE4(immu__dvmamap__early,
2689                                     dev_info_t *, rdip, uint64_t, dvma,
2690                                     uint_t, npages, uint_t, dmax+1);
2691 
2692                                 dvma += (npages << IMMU_PAGESHIFT);
2693                                 npages = 0;
2694                                 dmax = 0;
2695                         } else {
2696                                 dmax++;
2697                         }
2698                         dcookies[dmax].dck_paddr = paddr;
2699                         dcookies[dmax].dck_npages = 1;
2700                 }
2701                 size -= psize;
2702                 if (npages != 0)
2703                         npages++;
2704         }
2705 
2706         /*
2707          * Finish up, mapping all, or all of the remaining,
2708          * physical memory ranges.
2709          */
2710         if (ihp->ihp_npremapped == 0 && npages > 0) {
2711                 IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2712                     uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2713 
2714                 if (dvma_map(domain, dvma, npages, dcookies,
2715                     dmax + 1, rdip, immu_flags))
2716                         pde_set++;
2717         }
2718 
2719         /* Invalidate the IOTLB */
2720         immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2721             pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2722             &ihp->ihp_inv_wait);
2723 
2724         ihp->ihp_ndvseg = 1;
2725         ihp->ihp_dvseg[0].dvs_start = sdvma;
2726         ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2727 
2728         dma_out->dmao_size = dmar_object->dmao_size;
2729         dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2730         dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2731         dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2732         dma_out->dmao_type = DMA_OTYP_DVADDR;
2733 
2734         return (DDI_DMA_MAPPED);
2735 }
2736 
2737 static int
2738 immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2739 {
2740         uint64_t dvma, npages;
2741         domain_t *domain;
2742         struct dvmaseg *dvs;
2743 
2744         domain = IMMU_DEVI(rdip)->imd_domain;
2745         dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2746 
2747         dvma = dvs[0].dvs_start;
2748         npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2749 
2750 #ifdef DEBUG
2751         /* Unmap only in DEBUG mode */
2752         dvma_unmap(domain, dvma, npages, rdip);
2753 #endif
2754         dvma_free(domain, dvma, npages);
2755 
2756         IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2757             uint64_t, dvma);
2758 
2759 #ifdef DEBUG
2760         /*
2761          * In the DEBUG case, the unmap was actually done,
2762          * but an IOTLB flush was not done. So, an explicit
2763          * write back flush is needed.
2764          */
2765         immu_regs_wbf_flush(domain->dom_immu);
2766 #endif
2767 
2768         return (DDI_SUCCESS);
2769 }
2770 
2771 /* ############################# Functions exported ######################## */
2772 
2773 /*
2774  * setup the DVMA subsystem
2775  * this code runs only for the first IOMMU unit
2776  */
2777 void
2778 immu_dvma_setup(list_t *listp)
2779 {
2780         immu_t *immu;
2781         uint_t kval;
2782         size_t nchains;
2783 
2784         /* locks */
2785         mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2786 
2787         /* Create lists */
2788         list_create(&immu_unity_domain_list, sizeof (domain_t),
2789             offsetof(domain_t, dom_maptype_node));
2790         list_create(&immu_xlate_domain_list, sizeof (domain_t),
2791             offsetof(domain_t, dom_maptype_node));
2792 
2793         /* Setup BDF domain hash */
2794         nchains = 0xff;
2795         kval = mod_hash_iddata_gen(nchains);
2796 
2797         bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2798             nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2799             mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2800             KM_NOSLEEP);
2801 
2802         immu = list_head(listp);
2803         for (; immu; immu = list_next(listp, immu)) {
2804                 create_unity_domain(immu);
2805                 did_init(immu);
2806                 context_init(immu);
2807                 immu->immu_dvma_setup = B_TRUE;
2808         }
2809 }
2810 
2811 /*
2812  * Startup up one DVMA unit
2813  */
2814 void
2815 immu_dvma_startup(immu_t *immu)
2816 {
2817         if (immu_gfxdvma_enable == B_FALSE &&
2818             immu->immu_dvma_gfx_only == B_TRUE) {
2819                 return;
2820         }
2821 
2822         /*
2823          * DVMA will start once IOMMU is "running"
2824          */
2825         immu->immu_dvma_running = B_TRUE;
2826 }
2827 
2828 /*
2829  * immu_dvma_physmem_update()
2830  *       called when the installed memory on a
2831  *       system increases, to expand domain DVMA
2832  *       for domains with UNITY mapping
2833  */
2834 void
2835 immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2836 {
2837         uint64_t start;
2838         uint64_t npages;
2839         int dcount;
2840         immu_dcookie_t dcookies[1] = {0};
2841         domain_t *domain;
2842 
2843         /*
2844          * Just walk the system-wide list of domains with
2845          * UNITY mapping. Both the list of *all* domains
2846          * and *UNITY* domains is protected by the same
2847          * single lock
2848          */
2849         mutex_enter(&immu_domain_lock);
2850         domain = list_head(&immu_unity_domain_list);
2851         for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2852                 /*
2853                  * Nothing to do if the IOMMU supports passthrough.
2854                  */
2855                 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2856                         continue;
2857 
2858                 /* There is no vmem_arena for unity domains. Just map it */
2859                 ddi_err(DER_LOG, domain->dom_dip,
2860                     "iommu: unity-domain: Adding map "
2861                     "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2862 
2863                 start = IMMU_ROUNDOWN(addr);
2864                 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2865 
2866                 dcookies[0].dck_paddr = start;
2867                 dcookies[0].dck_npages = npages;
2868                 dcount = 1;
2869                 (void) dvma_map(domain, start, npages,
2870                     dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2871 
2872         }
2873         mutex_exit(&immu_domain_lock);
2874 }
2875 
2876 int
2877 immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2878 {
2879         dev_info_t *ddip, *odip;
2880         immu_t *immu;
2881         domain_t *domain;
2882 
2883         odip = rdip;
2884 
2885         immu = immu_dvma_get_immu(rdip, immu_flags);
2886         if (immu == NULL) {
2887                 /*
2888                  * possible that there is no IOMMU unit for this device
2889                  * - BIOS bugs are one example.
2890                  */
2891                 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2892                 return (DDI_DMA_NORESOURCES);
2893         }
2894 
2895         /*
2896          * redirect isa devices attached under lpc to lpc dip
2897          */
2898         if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2899                 rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2900                 if (rdip == NULL) {
2901                         ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2902                         /*NOTREACHED*/
2903                 }
2904         }
2905 
2906         /* Reset immu, as redirection can change IMMU */
2907         immu = NULL;
2908 
2909         /*
2910          * for gart, redirect to the real graphic devinfo
2911          */
2912         if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2913                 rdip = get_gfx_devinfo(rdip);
2914                 if (rdip == NULL) {
2915                         ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2916                         /*NOTREACHED*/
2917                 }
2918         }
2919 
2920         /*
2921          * Setup DVMA domain for the device. This does
2922          * work only the first time we do DVMA for a
2923          * device.
2924          */
2925         ddip = NULL;
2926         domain = device_domain(rdip, &ddip, immu_flags);
2927         if (domain == NULL) {
2928                 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2929                 return (DDI_DMA_NORESOURCES);
2930         }
2931 
2932         immu = domain->dom_immu;
2933 
2934         /*
2935          * If a domain is found, we must also have a domain dip
2936          * which is the topmost ancestor dip of rdip that shares
2937          * the same domain with rdip.
2938          */
2939         if (domain->dom_did == 0 || ddip == NULL) {
2940                 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2941                     domain->dom_did, ddip);
2942                 return (DDI_DMA_NORESOURCES);
2943         }
2944 
2945         if (odip != rdip)
2946                 set_domain(odip, ddip, domain);
2947 
2948         /*
2949          * Update the root and context entries
2950          */
2951         if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2952             != DDI_SUCCESS) {
2953                 ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2954                 return (DDI_DMA_NORESOURCES);
2955         }
2956 
2957         return (DDI_SUCCESS);
2958 }
2959 
2960 int
2961 immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2962 {
2963         immu_dcookie_t dcookies[1] = {0};
2964         boolean_t pde_set;
2965         immu_t *immu;
2966         domain_t *domain;
2967         immu_inv_wait_t iw;
2968 
2969         dcookies[0].dck_paddr = mrng->mrng_start;
2970         dcookies[0].dck_npages = mrng->mrng_npages;
2971 
2972         domain = IMMU_DEVI(rdip)->imd_domain;
2973         immu = domain->dom_immu;
2974 
2975         pde_set = dvma_map(domain, mrng->mrng_start,
2976             mrng->mrng_npages, dcookies, 1, rdip,
2977             IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2978 
2979         immu_init_inv_wait(&iw, "memrange", B_TRUE);
2980 
2981         immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2982             mrng->mrng_npages, pde_set == B_TRUE ?
2983             TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2984 
2985         return (DDI_SUCCESS);
2986 }
2987 
2988 immu_devi_t *
2989 immu_devi_get(dev_info_t *rdip)
2990 {
2991         immu_devi_t *immu_devi;
2992         volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2993 
2994         /* Just want atomic reads. No need for lock */
2995         immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2996             0);
2997         return (immu_devi);
2998 }
2999 
3000 /*ARGSUSED*/
3001 int
3002 immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
3003 {
3004         immu_hdl_priv_t *ihp;
3005 
3006         ihp = buf;
3007         immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3008 
3009         return (0);
3010 }
3011 
3012 /*
3013  * iommulib interface functions
3014  */
3015 static int
3016 immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3017 {
3018         immu_devi_t *immu_devi;
3019         int ret;
3020 
3021         if (!immu_enable)
3022                 return (DDI_FAILURE);
3023 
3024         /*
3025          * Make sure the device has all the IOMMU structures
3026          * initialized. If this device goes through an IOMMU
3027          * unit (e.g. this probe function returns success),
3028          * this will be called at most N times, with N being
3029          * the number of IOMMUs in the system.
3030          *
3031          * After that, when iommulib_nex_open succeeds,
3032          * we can always assume that this device has all
3033          * the structures initialized. IOMMU_USED(dip) will
3034          * be true. There is no need to find the controlling
3035          * IOMMU/domain again.
3036          */
3037         ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3038         if (ret != DDI_SUCCESS)
3039                 return (ret);
3040 
3041         immu_devi = IMMU_DEVI(dip);
3042 
3043         /*
3044          * For unity domains, there is no need to call in to
3045          * the IOMMU code.
3046          */
3047         if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3048                 return (DDI_FAILURE);
3049 
3050         if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3051                 return (DDI_SUCCESS);
3052 
3053         return (DDI_FAILURE);
3054 }
3055 
3056 /*ARGSUSED*/
3057 static int
3058 immu_allochdl(iommulib_handle_t handle,
3059     dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3060     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3061 {
3062         int ret;
3063         immu_hdl_priv_t *ihp;
3064         immu_t *immu;
3065 
3066         ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3067             arg, dma_handlep);
3068         if (ret == DDI_SUCCESS) {
3069                 immu = IMMU_DEVI(rdip)->imd_immu;
3070 
3071                 ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3072                     waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3073                 if (ihp == NULL) {
3074                         (void) iommulib_iommu_dma_freehdl(dip, rdip,
3075                             *dma_handlep);
3076                         return (DDI_DMA_NORESOURCES);
3077                 }
3078 
3079                 if (IMMU_DEVI(rdip)->imd_use_premap)
3080                         dvma_prealloc(rdip, ihp, attr);
3081                 else {
3082                         ihp->ihp_npremapped = 0;
3083                         ihp->ihp_predvma = 0;
3084                 }
3085                 ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3086                     ihp);
3087         }
3088         return (ret);
3089 }
3090 
3091 /*ARGSUSED*/
3092 static int
3093 immu_freehdl(iommulib_handle_t handle,
3094     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3095 {
3096         immu_hdl_priv_t *ihp;
3097 
3098         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3099         if (ihp != NULL) {
3100                 if (IMMU_DEVI(rdip)->imd_use_premap)
3101                         dvma_prefree(rdip, ihp);
3102                 kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3103         }
3104 
3105         return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3106 }
3107 
3108 
3109 /*ARGSUSED*/
3110 static int
3111 immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3112     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3113     struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3114     uint_t *ccountp)
3115 {
3116         int ret;
3117         immu_hdl_priv_t *ihp;
3118 
3119         ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3120             dma_req, cookiep, ccountp);
3121 
3122         if (ret == DDI_DMA_MAPPED) {
3123                 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3124                 immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3125         }
3126 
3127         return (ret);
3128 }
3129 
3130 /*ARGSUSED*/
3131 static int
3132 immu_unbindhdl(iommulib_handle_t handle,
3133     dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3134 {
3135         return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3136 }
3137 
3138 /*ARGSUSED*/
3139 static int
3140 immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3141     dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3142     size_t len, uint_t cachefl)
3143 {
3144         return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3145             cachefl));
3146 }
3147 
3148 /*ARGSUSED*/
3149 static int
3150 immu_win(iommulib_handle_t handle, dev_info_t *dip,
3151     dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3152     off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3153     uint_t *ccountp)
3154 {
3155         return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3156             lenp, cookiep, ccountp));
3157 }
3158 
3159 /*ARGSUSED*/
3160 static int
3161 immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3162     dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3163     struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3164 {
3165         immu_hdl_priv_t *ihp;
3166 
3167         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3168 
3169         return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3170 }
3171 
3172 /*ARGSUSED*/
3173 static int
3174 immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3175     dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3176 {
3177         immu_hdl_priv_t *ihp;
3178 
3179         ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3180         if (ihp->ihp_npremapped > 0)
3181                 return (DDI_SUCCESS);
3182         return (immu_unmap_dvmaseg(rdip, dmao));
3183 }