Print this page
    
8622 panic in PTE_set_all()
8623 IMMU_CONTIG_PADDR is broken for cookies with more than one page
8625 nvme causes bad free panic in IOMMU
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/i86pc/io/immu_dvma.c
          +++ new/usr/src/uts/i86pc/io/immu_dvma.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  
    | ↓ open down ↓ | 20 lines elided | ↑ open up ↑ | 
  21   21  /*
  22   22   * Portions Copyright (c) 2010, Oracle and/or its affiliates.
  23   23   * All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2009, Intel Corporation.
  27   27   * All rights reserved.
  28   28   */
  29   29  /*
  30   30   * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
       31 + * Copyright 2017 Joyent, Inc.
  31   32   */
  32   33  
  33   34  /*
  34   35   * DVMA code
  35   36   * This file contains Intel IOMMU code that deals with DVMA
  36   37   * i.e. DMA remapping.
  37   38   */
  38   39  
  39   40  #include <sys/sysmacros.h>
  40   41  #include <sys/pcie.h>
  41   42  #include <sys/pci_cfgspace.h>
  42   43  #include <vm/hat_i86.h>
  43   44  #include <sys/memlist.h>
  44   45  #include <sys/acpi/acpi.h>
  45   46  #include <sys/acpica.h>
  46   47  #include <sys/modhash.h>
  47   48  #include <sys/immu.h>
  48   49  #include <sys/x86_archext.h>
  49   50  #include <sys/archsystm.h>
  50   51  
  
    | ↓ open down ↓ | 10 lines elided | ↑ open up ↑ | 
  51   52  #undef  TEST
  52   53  
  53   54  /*
  54   55   * Macros based on PCI spec
  55   56   */
  56   57  #define IMMU_PCI_REV2CLASS(r)   ((r) >> 8)  /* classcode from revid */
  57   58  #define IMMU_PCI_CLASS2BASE(c)  ((c) >> 16) /* baseclass from classcode */
  58   59  #define IMMU_PCI_CLASS2SUB(c)   (((c) >> 8) & 0xff); /* classcode */
  59   60  
  60   61  #define IMMU_CONTIG_PADDR(d, p) \
  61      -        ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p))
       62 +        ((d).dck_paddr && ((d).dck_paddr + (d).dck_npages * IMMU_PAGESIZE) \
       63 +            == (p))
  62   64  
  63   65  typedef struct dvma_arg {
  64   66          immu_t *dva_immu;
  65   67          dev_info_t *dva_rdip;
  66   68          dev_info_t *dva_ddip;
  67   69          domain_t *dva_domain;
  68   70          int dva_level;
  69   71          immu_flags_t dva_flags;
  70   72          list_t *dva_list;
  71   73          int dva_error;
  72   74  } dvma_arg_t;
  73   75  
  74   76  static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
  75   77      dev_info_t *rdip, immu_flags_t immu_flags);
  76   78  static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
  77   79      int dev, int func, immu_flags_t immu_flags);
  78   80  static void destroy_immu_devi(immu_devi_t *immu_devi);
  79   81  static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
  80   82      uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
  81   83      immu_flags_t immu_flags);
  82   84  
  83   85  /* Extern globals */
  84   86  extern struct memlist  *phys_install;
  85   87  
  86   88  /*
  87   89   * iommulib interface functions.
  88   90   */
  89   91  static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
  90   92  static int immu_allochdl(iommulib_handle_t handle,
  91   93      dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
  92   94      int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
  93   95  static int immu_freehdl(iommulib_handle_t handle,
  94   96      dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
  95   97  static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
  96   98      dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
  97   99      ddi_dma_cookie_t *cookiep, uint_t *ccountp);
  98  100  static int immu_unbindhdl(iommulib_handle_t handle,
  99  101      dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
 100  102  static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
 101  103      dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
 102  104      uint_t cachefl);
 103  105  static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
 104  106      dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
 105  107      off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
 106  108  static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
 107  109      dev_info_t *rdip, ddi_dma_handle_t dma_handle,
 108  110      struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
 109  111  static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
 110  112      dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
 111  113  
 112  114  /* static Globals */
 113  115  
 114  116  /*
 115  117   * Used to setup DMA objects (memory regions)
 116  118   * for DMA reads by IOMMU units
 117  119   */
 118  120  static ddi_dma_attr_t immu_dma_attr = {
 119  121          DMA_ATTR_V0,
 120  122          0U,
 121  123          0xffffffffffffffffULL,
 122  124          0xffffffffU,
 123  125          MMU_PAGESIZE, /* MMU page aligned */
 124  126          0x1,
 125  127          0x1,
 126  128          0xffffffffU,
 127  129          0xffffffffffffffffULL,
 128  130          1,
 129  131          4,
 130  132          0
 131  133  };
 132  134  
 133  135  static ddi_device_acc_attr_t immu_acc_attr = {
 134  136          DDI_DEVICE_ATTR_V0,
 135  137          DDI_NEVERSWAP_ACC,
 136  138          DDI_STRICTORDER_ACC
 137  139  };
 138  140  
 139  141  struct iommulib_ops immulib_ops = {
 140  142          IOMMU_OPS_VERSION,
 141  143          INTEL_IOMMU,
 142  144          "Intel IOMMU",
 143  145          NULL,
 144  146          immu_probe,
 145  147          immu_allochdl,
 146  148          immu_freehdl,
 147  149          immu_bindhdl,
 148  150          immu_unbindhdl,
 149  151          immu_sync,
 150  152          immu_win,
 151  153          immu_mapobject,
 152  154          immu_unmapobject,
 153  155  };
 154  156  
 155  157  /*
 156  158   * Fake physical address range used to set up initial prealloc mappings.
 157  159   * This memory is never actually accessed. It is mapped read-only,
 158  160   * and is overwritten as soon as the first DMA bind operation is
 159  161   * performed. Since 0 is a special case, just start at the 2nd
 160  162   * physical page.
 161  163   */
 162  164  
 163  165  static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
 164  166  
 165  167  /* globals private to this file */
 166  168  static kmutex_t immu_domain_lock;
 167  169  static list_t immu_unity_domain_list;
 168  170  static list_t immu_xlate_domain_list;
 169  171  
 170  172  /* structure used to store idx into each level of the page tables */
 171  173  typedef struct xlate {
 172  174          int xlt_level;
 173  175          uint_t xlt_idx;
 174  176          pgtable_t *xlt_pgtable;
 175  177  } xlate_t;
 176  178  
 177  179  /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
 178  180  #define IMMU_UNITY_DID   1
 179  181  
 180  182  static mod_hash_t *bdf_domain_hash;
 181  183  
 182  184  int immu_use_alh;
 183  185  int immu_use_tm;
 184  186  
 185  187  static domain_t *
 186  188  bdf_domain_lookup(immu_devi_t *immu_devi)
 187  189  {
 188  190          domain_t *domain;
 189  191          int16_t seg = immu_devi->imd_seg;
 190  192          int16_t bus = immu_devi->imd_bus;
 191  193          int16_t devfunc = immu_devi->imd_devfunc;
 192  194          uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 193  195  
 194  196          if (seg < 0 || bus < 0 || devfunc < 0) {
 195  197                  return (NULL);
 196  198          }
 197  199  
 198  200          domain = NULL;
 199  201          if (mod_hash_find(bdf_domain_hash,
 200  202              (void *)bdf, (void *)&domain) == 0) {
 201  203                  ASSERT(domain);
 202  204                  ASSERT(domain->dom_did > 0);
 203  205                  return (domain);
 204  206          } else {
 205  207                  return (NULL);
 206  208          }
 207  209  }
 208  210  
 209  211  static void
 210  212  bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
 211  213  {
 212  214          int16_t seg = immu_devi->imd_seg;
 213  215          int16_t bus = immu_devi->imd_bus;
 214  216          int16_t devfunc = immu_devi->imd_devfunc;
 215  217          uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
 216  218  
 217  219          if (seg < 0 || bus < 0 || devfunc < 0) {
 218  220                  return;
 219  221          }
 220  222  
 221  223          (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
 222  224  }
 223  225  
 224  226  static int
 225  227  match_lpc(dev_info_t *pdip, void *arg)
 226  228  {
 227  229          immu_devi_t *immu_devi;
 228  230          dvma_arg_t *dvap = (dvma_arg_t *)arg;
 229  231  
 230  232          if (list_is_empty(dvap->dva_list)) {
 231  233                  return (DDI_WALK_TERMINATE);
 232  234          }
 233  235  
 234  236          immu_devi = list_head(dvap->dva_list);
 235  237          for (; immu_devi; immu_devi = list_next(dvap->dva_list,
 236  238              immu_devi)) {
 237  239                  if (immu_devi->imd_dip == pdip) {
 238  240                          dvap->dva_ddip = pdip;
 239  241                          dvap->dva_error = DDI_SUCCESS;
 240  242                          return (DDI_WALK_TERMINATE);
 241  243                  }
 242  244          }
 243  245  
 244  246          return (DDI_WALK_CONTINUE);
 245  247  }
 246  248  
 247  249  static void
 248  250  immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
 249  251  {
 250  252          list_t *spclist = NULL;
 251  253          immu_devi_t *immu_devi;
 252  254  
 253  255          immu_devi = IMMU_DEVI(dip);
 254  256          if (immu_devi->imd_display == B_TRUE) {
 255  257                  spclist = &(immu->immu_dvma_gfx_list);
 256  258          } else if (immu_devi->imd_lpc == B_TRUE) {
 257  259                  spclist = &(immu->immu_dvma_lpc_list);
 258  260          }
 259  261  
 260  262          if (spclist) {
 261  263                  mutex_enter(&(immu->immu_lock));
 262  264                  list_insert_head(spclist, immu_devi);
 263  265                  mutex_exit(&(immu->immu_lock));
 264  266          }
 265  267  }
 266  268  
 267  269  /*
 268  270   * Set the immu_devi struct in the immu_devi field of a devinfo node
 269  271   */
 270  272  int
 271  273  immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
 272  274  {
 273  275          int bus, dev, func;
 274  276          immu_devi_t *new_imd;
 275  277          immu_devi_t *immu_devi;
 276  278  
 277  279          immu_devi = immu_devi_get(dip);
 278  280          if (immu_devi != NULL) {
 279  281                  return (DDI_SUCCESS);
 280  282          }
 281  283  
 282  284          bus = dev = func = -1;
 283  285  
 284  286          /*
 285  287           * Assume a new immu_devi struct is needed
 286  288           */
 287  289          if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
 288  290                  /*
 289  291                   * No BDF. Set bus = -1 to indicate this.
 290  292                   * We still need to create a immu_devi struct
 291  293                   * though
 292  294                   */
 293  295                  bus = -1;
 294  296                  dev = 0;
 295  297                  func = 0;
 296  298          }
 297  299  
 298  300          new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
 299  301          if (new_imd  == NULL) {
 300  302                  ddi_err(DER_WARN, dip, "Failed to create immu_devi "
 301  303                      "structure");
 302  304                  return (DDI_FAILURE);
 303  305          }
 304  306  
 305  307          /*
 306  308           * Check if some other thread allocated a immu_devi while we
 307  309           * didn't own the lock.
 308  310           */
 309  311          mutex_enter(&(DEVI(dip)->devi_lock));
 310  312          if (IMMU_DEVI(dip) == NULL) {
 311  313                  IMMU_DEVI_SET(dip, new_imd);
 312  314          } else {
 313  315                  destroy_immu_devi(new_imd);
 314  316          }
 315  317          mutex_exit(&(DEVI(dip)->devi_lock));
 316  318  
 317  319          return (DDI_SUCCESS);
 318  320  }
 319  321  
 320  322  static dev_info_t *
 321  323  get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
 322  324  {
 323  325          dvma_arg_t dvarg = {0};
 324  326          dvarg.dva_list = &(immu->immu_dvma_lpc_list);
 325  327          dvarg.dva_rdip = rdip;
 326  328          dvarg.dva_error = DDI_FAILURE;
 327  329  
 328  330          if (immu_walk_ancestor(rdip, NULL, match_lpc,
 329  331              &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
 330  332                  ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
 331  333                      "find lpc_devinfo for ISA device");
 332  334                  return (NULL);
 333  335          }
 334  336  
 335  337          if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
 336  338                  ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
 337  339                      "ISA device");
 338  340                  return (NULL);
 339  341          }
 340  342  
 341  343          return (dvarg.dva_ddip);
 342  344  }
 343  345  
 344  346  static dev_info_t *
 345  347  get_gfx_devinfo(dev_info_t *rdip)
 346  348  {
 347  349          immu_t *immu;
 348  350          immu_devi_t *immu_devi;
 349  351          list_t *list_gfx;
 350  352  
 351  353          /*
 352  354           * The GFX device may not be on the same iommu unit as "agpgart"
 353  355           * so search globally
 354  356           */
 355  357          immu_devi = NULL;
 356  358          immu = list_head(&immu_list);
 357  359          for (; immu; immu = list_next(&immu_list, immu)) {
 358  360                  list_gfx = &(immu->immu_dvma_gfx_list);
 359  361                  if (!list_is_empty(list_gfx)) {
 360  362                          immu_devi = list_head(list_gfx);
 361  363                          break;
 362  364                  }
 363  365          }
 364  366  
 365  367          if (immu_devi == NULL) {
 366  368                  ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
 367  369                      "Cannot redirect agpgart");
 368  370                  return (NULL);
 369  371          }
 370  372  
 371  373          ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
 372  374              ddi_node_name(immu_devi->imd_dip));
 373  375  
 374  376          return (immu_devi->imd_dip);
 375  377  }
 376  378  
 377  379  static immu_flags_t
 378  380  dma_to_immu_flags(struct ddi_dma_req *dmareq)
 379  381  {
 380  382          immu_flags_t flags = 0;
 381  383  
 382  384          if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
 383  385                  flags |= IMMU_FLAGS_SLEEP;
 384  386          } else {
 385  387                  flags |= IMMU_FLAGS_NOSLEEP;
 386  388          }
 387  389  
 388  390  #ifdef BUGGY_DRIVERS
 389  391  
 390  392          flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 391  393  
 392  394  #else
 393  395          /*
 394  396           * Read and write flags need to be reversed.
 395  397           * DMA_READ means read from device and write
 396  398           * to memory. So DMA read means DVMA write.
 397  399           */
 398  400          if (dmareq->dmar_flags & DDI_DMA_READ)
 399  401                  flags |= IMMU_FLAGS_WRITE;
 400  402  
 401  403          if (dmareq->dmar_flags & DDI_DMA_WRITE)
 402  404                  flags |= IMMU_FLAGS_READ;
 403  405  
 404  406          /*
 405  407           * Some buggy drivers specify neither READ or WRITE
 406  408           * For such drivers set both read and write permissions
 407  409           */
 408  410          if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
 409  411                  flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 410  412          }
 411  413  #endif
 412  414  
 413  415          return (flags);
 414  416  }
 415  417  
 416  418  /*ARGSUSED*/
 417  419  int
 418  420  pgtable_ctor(void *buf, void *arg, int kmflag)
 419  421  {
 420  422          size_t actual_size = 0;
 421  423          pgtable_t *pgtable;
 422  424          int (*dmafp)(caddr_t);
 423  425          caddr_t vaddr;
 424  426          void *next;
 425  427          uint_t flags;
 426  428          immu_t *immu = arg;
 427  429  
 428  430          pgtable = (pgtable_t *)buf;
 429  431  
 430  432          dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
 431  433  
 432  434          next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
 433  435          if (next == NULL) {
 434  436                  return (-1);
 435  437          }
 436  438  
 437  439          if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
 438  440              dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
 439  441                  kmem_free(next, IMMU_PAGESIZE);
 440  442                  return (-1);
 441  443          }
 442  444  
 443  445          flags = DDI_DMA_CONSISTENT;
 444  446          if (!immu->immu_dvma_coherent)
 445  447                  flags |= IOMEM_DATA_UC_WR_COMBINE;
 446  448  
 447  449          if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
 448  450              &immu_acc_attr, flags,
 449  451              dmafp, NULL, &vaddr, &actual_size,
 450  452              &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
 451  453                  ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 452  454                  kmem_free(next, IMMU_PAGESIZE);
 453  455                  return (-1);
 454  456          }
 455  457  
 456  458          /*
 457  459           * Memory allocation failure. Maybe a temporary condition
 458  460           * so return error rather than panic, so we can try again
 459  461           */
 460  462          if (actual_size < IMMU_PAGESIZE) {
 461  463                  ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 462  464                  ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 463  465                  kmem_free(next, IMMU_PAGESIZE);
 464  466                  return (-1);
 465  467          }
 466  468  
 467  469          pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
 468  470          pgtable->hwpg_vaddr = vaddr;
 469  471          pgtable->swpg_next_array = next;
 470  472  
 471  473          rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
 472  474  
 473  475          return (0);
 474  476  }
 475  477  
 476  478  /*ARGSUSED*/
 477  479  void
 478  480  pgtable_dtor(void *buf, void *arg)
 479  481  {
 480  482          pgtable_t *pgtable;
 481  483  
 482  484          pgtable = (pgtable_t *)buf;
 483  485  
 484  486          /* destroy will panic if lock is held. */
 485  487          rw_destroy(&(pgtable->swpg_rwlock));
 486  488  
 487  489          ddi_dma_mem_free(&pgtable->hwpg_memhdl);
 488  490          ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
 489  491          kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
 490  492  }
 491  493  
 492  494  /*
 493  495   * pgtable_alloc()
 494  496   *      alloc a IOMMU pgtable structure.
 495  497   *      This same struct is used for root and context tables as well.
 496  498   *      This routine allocs the f/ollowing:
 497  499   *      - a pgtable_t struct
 498  500   *      - a HW page which holds PTEs/entries which is accesssed by HW
 499  501   *        so we set up DMA for this page
 500  502   *      - a SW page which is only for our bookeeping
 501  503   *        (for example to  hold pointers to the next level pgtable).
 502  504   *        So a simple kmem_alloc suffices
 503  505   */
 504  506  static pgtable_t *
 505  507  pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
 506  508  {
 507  509          pgtable_t *pgtable;
 508  510          int kmflags;
 509  511  
 510  512          kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 511  513  
 512  514          pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
 513  515          if (pgtable == NULL) {
 514  516                  return (NULL);
 515  517          }
 516  518          return (pgtable);
 517  519  }
 518  520  
 519  521  static void
 520  522  pgtable_zero(pgtable_t *pgtable)
 521  523  {
 522  524          bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
 523  525          bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
 524  526  }
 525  527  
 526  528  static void
 527  529  pgtable_free(immu_t *immu, pgtable_t *pgtable)
 528  530  {
 529  531          kmem_cache_free(immu->immu_pgtable_cache, pgtable);
 530  532  }
 531  533  
 532  534  /*
 533  535   * Function to identify a display device from the PCI class code
 534  536   */
 535  537  static boolean_t
 536  538  device_is_display(uint_t classcode)
 537  539  {
 538  540          static uint_t disp_classes[] = {
 539  541                  0x000100,
 540  542                  0x030000,
 541  543                  0x030001
 542  544          };
 543  545          int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
 544  546  
 545  547          for (i = 0; i < nclasses; i++) {
 546  548                  if (classcode == disp_classes[i])
 547  549                          return (B_TRUE);
 548  550          }
 549  551          return (B_FALSE);
 550  552  }
 551  553  
 552  554  /*
 553  555   * Function that determines if device is PCIEX and/or PCIEX bridge
 554  556   */
 555  557  static boolean_t
 556  558  device_is_pciex(
 557  559          uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
 558  560  {
 559  561          ushort_t cap;
 560  562          ushort_t capsp;
 561  563          ushort_t cap_count = PCI_CAP_MAX_PTR;
 562  564          ushort_t status;
 563  565          boolean_t is_pciex = B_FALSE;
 564  566  
 565  567          *is_pcib = B_FALSE;
 566  568  
 567  569          status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
 568  570          if (!(status & PCI_STAT_CAP))
 569  571                  return (B_FALSE);
 570  572  
 571  573          capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
 572  574          while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
 573  575                  capsp &= PCI_CAP_PTR_MASK;
 574  576                  cap = pci_getb_func(bus, dev, func, capsp);
 575  577  
 576  578                  if (cap == PCI_CAP_ID_PCI_E) {
 577  579                          status = pci_getw_func(bus, dev, func, capsp + 2);
 578  580                          /*
 579  581                           * See section 7.8.2 of PCI-Express Base Spec v1.0a
 580  582                           * for Device/Port Type.
 581  583                           * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
 582  584                           * device is a PCIE2PCI bridge
 583  585                           */
 584  586                          *is_pcib =
 585  587                              ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
 586  588                              PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
 587  589                          is_pciex = B_TRUE;
 588  590                  }
 589  591  
 590  592                  capsp = (*pci_getb_func)(bus, dev, func,
 591  593                      capsp + PCI_CAP_NEXT_PTR);
 592  594          }
 593  595  
 594  596          return (is_pciex);
 595  597  }
 596  598  
 597  599  static boolean_t
 598  600  device_use_premap(uint_t classcode)
 599  601  {
 600  602          if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
 601  603                  return (B_TRUE);
 602  604          return (B_FALSE);
 603  605  }
 604  606  
 605  607  
 606  608  /*
 607  609   * immu_dvma_get_immu()
 608  610   *   get the immu unit structure for a dev_info node
 609  611   */
 610  612  immu_t *
 611  613  immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
 612  614  {
 613  615          immu_devi_t *immu_devi;
 614  616          immu_t *immu;
 615  617  
 616  618          /*
 617  619           * check if immu unit was already found earlier.
 618  620           * If yes, then it will be stashed in immu_devi struct.
 619  621           */
 620  622          immu_devi = immu_devi_get(dip);
 621  623          if (immu_devi == NULL) {
 622  624                  if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
 623  625                          /*
 624  626                           * May fail because of low memory. Return error rather
 625  627                           * than panic as we want driver to rey again later
 626  628                           */
 627  629                          ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 628  630                              "No immu_devi structure");
 629  631                          /*NOTREACHED*/
 630  632                  }
 631  633                  immu_devi = immu_devi_get(dip);
 632  634          }
 633  635  
 634  636          mutex_enter(&(DEVI(dip)->devi_lock));
 635  637          if (immu_devi->imd_immu) {
 636  638                  immu = immu_devi->imd_immu;
 637  639                  mutex_exit(&(DEVI(dip)->devi_lock));
 638  640                  return (immu);
 639  641          }
 640  642          mutex_exit(&(DEVI(dip)->devi_lock));
 641  643  
 642  644          immu = immu_dmar_get_immu(dip);
 643  645          if (immu == NULL) {
 644  646                  ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
 645  647                      "Cannot find immu_t for device");
 646  648                  /*NOTREACHED*/
 647  649          }
 648  650  
 649  651          /*
 650  652           * Check if some other thread found immu
 651  653           * while lock was not held
 652  654           */
 653  655          immu_devi = immu_devi_get(dip);
 654  656          /* immu_devi should be present as we found it earlier */
 655  657          if (immu_devi == NULL) {
 656  658                  ddi_err(DER_PANIC, dip,
 657  659                      "immu_dvma_get_immu: No immu_devi structure");
 658  660                  /*NOTREACHED*/
 659  661          }
 660  662  
 661  663          mutex_enter(&(DEVI(dip)->devi_lock));
 662  664          if (immu_devi->imd_immu == NULL) {
 663  665                  /* nobody else set it, so we should do it */
 664  666                  immu_devi->imd_immu = immu;
 665  667                  immu_devi_set_spclist(dip, immu);
 666  668          } else {
 667  669                  /*
 668  670                   * if some other thread got immu before
 669  671                   * us, it should get the same results
 670  672                   */
 671  673                  if (immu_devi->imd_immu != immu) {
 672  674                          ddi_err(DER_PANIC, dip, "Multiple "
 673  675                              "immu units found for device. Expected (%p), "
 674  676                              "actual (%p)", (void *)immu,
 675  677                              (void *)immu_devi->imd_immu);
 676  678                          mutex_exit(&(DEVI(dip)->devi_lock));
 677  679                          /*NOTREACHED*/
 678  680                  }
 679  681          }
 680  682          mutex_exit(&(DEVI(dip)->devi_lock));
 681  683  
 682  684          return (immu);
 683  685  }
 684  686  
 685  687  
 686  688  /* ############################# IMMU_DEVI code ############################ */
 687  689  
 688  690  /*
 689  691   * Allocate a immu_devi structure and initialize it
 690  692   */
 691  693  static immu_devi_t *
 692  694  create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
 693  695      immu_flags_t immu_flags)
 694  696  {
 695  697          uchar_t baseclass, subclass;
 696  698          uint_t classcode, revclass;
 697  699          immu_devi_t *immu_devi;
 698  700          boolean_t pciex = B_FALSE;
 699  701          int kmflags;
 700  702          boolean_t is_pcib = B_FALSE;
 701  703  
 702  704          /* bus ==  -1 indicate non-PCI device (no BDF) */
 703  705          ASSERT(bus == -1 || bus >= 0);
 704  706          ASSERT(dev >= 0);
 705  707          ASSERT(func >= 0);
 706  708  
 707  709          kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 708  710          immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
 709  711          if (immu_devi == NULL) {
 710  712                  ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
 711  713                      "Intel IOMMU immu_devi structure");
 712  714                  return (NULL);
 713  715          }
 714  716          immu_devi->imd_dip = rdip;
 715  717          immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
 716  718          immu_devi->imd_bus = bus;
 717  719          immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
 718  720  
 719  721          if (bus == -1) {
 720  722                  immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
 721  723                  return (immu_devi);
 722  724          }
 723  725  
 724  726          immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
 725  727          immu_devi->imd_sec = 0;
 726  728          immu_devi->imd_sub = 0;
 727  729  
 728  730          revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
 729  731  
 730  732          classcode = IMMU_PCI_REV2CLASS(revclass);
 731  733          baseclass = IMMU_PCI_CLASS2BASE(classcode);
 732  734          subclass = IMMU_PCI_CLASS2SUB(classcode);
 733  735  
 734  736          if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
 735  737  
 736  738                  immu_devi->imd_sec = pci_getb_func(bus, dev, func,
 737  739                      PCI_BCNF_SECBUS);
 738  740                  immu_devi->imd_sub = pci_getb_func(bus, dev, func,
 739  741                      PCI_BCNF_SUBBUS);
 740  742  
 741  743                  pciex = device_is_pciex(bus, dev, func, &is_pcib);
 742  744                  if (pciex  == B_TRUE && is_pcib == B_TRUE) {
 743  745                          immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
 744  746                  } else if (pciex == B_TRUE) {
 745  747                          immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
 746  748                  } else {
 747  749                          immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
 748  750                  }
 749  751          } else {
 750  752                  immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
 751  753          }
 752  754  
 753  755          /* check for certain special devices */
 754  756          immu_devi->imd_display = device_is_display(classcode);
 755  757          immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
 756  758              (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
 757  759          immu_devi->imd_use_premap = device_use_premap(classcode);
 758  760  
 759  761          immu_devi->imd_domain = NULL;
 760  762  
 761  763          immu_devi->imd_dvma_flags = immu_global_dvma_flags;
 762  764  
 763  765          return (immu_devi);
 764  766  }
 765  767  
 766  768  static void
 767  769  destroy_immu_devi(immu_devi_t *immu_devi)
 768  770  {
 769  771          kmem_free(immu_devi, sizeof (immu_devi_t));
 770  772  }
 771  773  
 772  774  static domain_t *
 773  775  immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
 774  776  {
 775  777          immu_devi_t *immu_devi;
 776  778          domain_t *domain;
 777  779          dev_info_t *ddip;
 778  780  
 779  781          *ddipp = NULL;
 780  782  
 781  783          immu_devi = immu_devi_get(rdip);
 782  784          if (immu_devi == NULL) {
 783  785                  return (NULL);
 784  786          }
 785  787  
 786  788          mutex_enter(&(DEVI(rdip)->devi_lock));
 787  789          domain = immu_devi->imd_domain;
 788  790          ddip = immu_devi->imd_ddip;
 789  791          mutex_exit(&(DEVI(rdip)->devi_lock));
 790  792  
 791  793          if (domain)
 792  794                  *ddipp = ddip;
 793  795  
 794  796          return (domain);
 795  797  
 796  798  }
 797  799  
 798  800  /* ############################# END IMMU_DEVI code ######################## */
 799  801  /* ############################# DOMAIN code ############################### */
 800  802  
 801  803  /*
 802  804   * This routine always succeeds
 803  805   */
 804  806  static int
 805  807  did_alloc(immu_t *immu, dev_info_t *rdip,
 806  808      dev_info_t *ddip, immu_flags_t immu_flags)
 807  809  {
 808  810          int did;
 809  811  
 810  812          did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
 811  813              (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
 812  814  
 813  815          if (did == 0) {
 814  816                  ddi_err(DER_WARN, rdip, "device domain-id alloc error"
 815  817                      " domain-device: %s%d. immu unit is %s. Using "
 816  818                      "unity domain with domain-id (%d)",
 817  819                      ddi_driver_name(ddip), ddi_get_instance(ddip),
 818  820                      immu->immu_name, immu->immu_unity_domain->dom_did);
 819  821                  did = immu->immu_unity_domain->dom_did;
 820  822          }
 821  823  
 822  824          return (did);
 823  825  }
 824  826  
 825  827  static int
 826  828  get_branch_domain(dev_info_t *pdip, void *arg)
 827  829  {
 828  830          immu_devi_t *immu_devi;
 829  831          domain_t *domain;
 830  832          dev_info_t *ddip;
 831  833          immu_t *immu;
 832  834          dvma_arg_t *dvp = (dvma_arg_t *)arg;
 833  835  
 834  836          /*
 835  837           * The field dvp->dva_rdip is a work-in-progress
 836  838           * and gets updated as we walk up the ancestor
 837  839           * tree. The final ddip is set only when we reach
 838  840           * the top of the tree. So the dvp->dva_ddip field cannot
 839  841           * be relied on until we reach the top of the field.
 840  842           */
 841  843  
 842  844          /* immu_devi may not be set. */
 843  845          immu_devi = immu_devi_get(pdip);
 844  846          if (immu_devi == NULL) {
 845  847                  if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
 846  848                          dvp->dva_error = DDI_FAILURE;
 847  849                          return (DDI_WALK_TERMINATE);
 848  850                  }
 849  851          }
 850  852  
 851  853          immu_devi = immu_devi_get(pdip);
 852  854          immu = immu_devi->imd_immu;
 853  855          if (immu == NULL)
 854  856                  immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
 855  857  
 856  858          /*
 857  859           * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
 858  860           * terminate the walk (since the device under the PCIE bridge
 859  861           * is a PCIE device and has an independent entry in the
 860  862           * root/context table)
 861  863           */
 862  864          if (dvp->dva_rdip != pdip &&
 863  865              immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
 864  866                  return (DDI_WALK_TERMINATE);
 865  867          }
 866  868  
 867  869          /*
 868  870           * In order to be a domain-dim, it must be a PCI device i.e.
 869  871           * must have valid BDF. This also eliminates the root complex.
 870  872           */
 871  873          if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
 872  874              immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
 873  875                  ASSERT(immu_devi->imd_bus >= 0);
 874  876                  ASSERT(immu_devi->imd_devfunc >= 0);
 875  877                  dvp->dva_ddip = pdip;
 876  878          }
 877  879  
 878  880          if (immu_devi->imd_display == B_TRUE ||
 879  881              (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
 880  882                  dvp->dva_domain = immu->immu_unity_domain;
 881  883                  /* continue walking to find ddip */
 882  884                  return (DDI_WALK_CONTINUE);
 883  885          }
 884  886  
 885  887          mutex_enter(&(DEVI(pdip)->devi_lock));
 886  888          domain = immu_devi->imd_domain;
 887  889          ddip = immu_devi->imd_ddip;
 888  890          mutex_exit(&(DEVI(pdip)->devi_lock));
 889  891  
 890  892          if (domain && ddip) {
 891  893                  /* if domain is set, it must be the same */
 892  894                  if (dvp->dva_domain) {
 893  895                          ASSERT(domain == dvp->dva_domain);
 894  896                  }
 895  897                  dvp->dva_domain = domain;
 896  898                  dvp->dva_ddip = ddip;
 897  899                  return (DDI_WALK_TERMINATE);
 898  900          }
 899  901  
 900  902          /* Domain may already be set, continue walking so that ddip gets set */
 901  903          if (dvp->dva_domain) {
 902  904                  return (DDI_WALK_CONTINUE);
 903  905          }
 904  906  
 905  907          /* domain is not set in either immu_devi or dvp */
 906  908          domain = bdf_domain_lookup(immu_devi);
 907  909          if (domain == NULL) {
 908  910                  return (DDI_WALK_CONTINUE);
 909  911          }
 910  912  
 911  913          /* ok, the BDF hash had a domain for this BDF. */
 912  914  
 913  915          /* Grab lock again to check if something else set immu_devi fields */
 914  916          mutex_enter(&(DEVI(pdip)->devi_lock));
 915  917          if (immu_devi->imd_domain != NULL) {
 916  918                  dvp->dva_domain = domain;
 917  919          } else {
 918  920                  dvp->dva_domain = domain;
 919  921          }
 920  922          mutex_exit(&(DEVI(pdip)->devi_lock));
 921  923  
 922  924          /*
 923  925           * walk upwards until the topmost PCI bridge is found
 924  926           */
 925  927          return (DDI_WALK_CONTINUE);
 926  928  
 927  929  }
 928  930  
 929  931  static void
 930  932  map_unity_domain(domain_t *domain)
 931  933  {
 932  934          struct memlist *mp;
 933  935          uint64_t start;
 934  936          uint64_t npages;
 935  937          immu_dcookie_t dcookies[1] = {0};
 936  938          int dcount = 0;
 937  939  
 938  940          /*
 939  941           * UNITY arenas are a mirror of the physical memory
 940  942           * installed on the system.
 941  943           */
 942  944  
 943  945  #ifdef BUGGY_DRIVERS
 944  946          /*
 945  947           * Dont skip page0. Some broken HW/FW access it.
 946  948           */
 947  949          dcookies[0].dck_paddr = 0;
 948  950          dcookies[0].dck_npages = 1;
 949  951          dcount = 1;
 950  952          (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
 951  953              IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
 952  954  #endif
 953  955  
 954  956          memlist_read_lock();
 955  957  
 956  958          mp = phys_install;
 957  959  
 958  960          if (mp->ml_address == 0) {
 959  961                  /* since we already mapped page1 above */
 960  962                  start = IMMU_PAGESIZE;
 961  963          } else {
 962  964                  start = mp->ml_address;
 963  965          }
 964  966          npages = mp->ml_size/IMMU_PAGESIZE + 1;
 965  967  
 966  968          dcookies[0].dck_paddr = start;
 967  969          dcookies[0].dck_npages = npages;
 968  970          dcount = 1;
 969  971          (void) dvma_map(domain, start, npages, dcookies,
 970  972              dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 971  973  
 972  974          ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
 973  975              " - 0x%" PRIx64 "]", start, start + mp->ml_size);
 974  976  
 975  977          mp = mp->ml_next;
 976  978          while (mp) {
 977  979                  ddi_err(DER_LOG, domain->dom_dip,
 978  980                      "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 979  981                      mp->ml_address, mp->ml_address + mp->ml_size);
 980  982  
 981  983                  start = mp->ml_address;
 982  984                  npages = mp->ml_size/IMMU_PAGESIZE + 1;
 983  985  
 984  986                  dcookies[0].dck_paddr = start;
 985  987                  dcookies[0].dck_npages = npages;
 986  988                  dcount = 1;
 987  989                  (void) dvma_map(domain, start, npages,
 988  990                      dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
 989  991                  mp = mp->ml_next;
 990  992          }
 991  993  
 992  994          mp = bios_rsvd;
 993  995          while (mp) {
 994  996                  ddi_err(DER_LOG, domain->dom_dip,
 995  997                      "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
 996  998                      mp->ml_address, mp->ml_address + mp->ml_size);
 997  999  
 998 1000                  start = mp->ml_address;
 999 1001                  npages = mp->ml_size/IMMU_PAGESIZE + 1;
1000 1002  
1001 1003                  dcookies[0].dck_paddr = start;
1002 1004                  dcookies[0].dck_npages = npages;
1003 1005                  dcount = 1;
1004 1006                  (void) dvma_map(domain, start, npages,
1005 1007                      dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1006 1008  
1007 1009                  mp = mp->ml_next;
1008 1010          }
1009 1011  
1010 1012          memlist_read_unlock();
1011 1013  }
1012 1014  
1013 1015  /*
1014 1016   * create_xlate_arena()
1015 1017   *      Create the dvma arena for a domain with translation
1016 1018   *      mapping
1017 1019   */
1018 1020  static void
1019 1021  create_xlate_arena(immu_t *immu, domain_t *domain,
1020 1022      dev_info_t *rdip, immu_flags_t immu_flags)
1021 1023  {
1022 1024          char *arena_name;
1023 1025          struct memlist *mp;
1024 1026          int vmem_flags;
1025 1027          uint64_t start;
1026 1028          uint_t mgaw;
1027 1029          uint64_t size;
1028 1030          uint64_t maxaddr;
1029 1031          void *vmem_ret;
1030 1032  
1031 1033          arena_name = domain->dom_dvma_arena_name;
1032 1034  
1033 1035          /* Note, don't do sizeof (arena_name) - it is just a pointer */
1034 1036          (void) snprintf(arena_name,
1035 1037              sizeof (domain->dom_dvma_arena_name),
1036 1038              "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1037 1039              domain->dom_did);
1038 1040  
1039 1041          vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1040 1042  
1041 1043          /* Restrict mgaddr (max guest addr) to MGAW */
1042 1044          mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1043 1045  
1044 1046          /*
1045 1047           * To ensure we avoid ioapic and PCI MMIO ranges we just
1046 1048           * use the physical memory address range of the system as the
1047 1049           * range
1048 1050           */
1049 1051          maxaddr = ((uint64_t)1 << mgaw);
1050 1052  
1051 1053          memlist_read_lock();
1052 1054  
1053 1055          mp = phys_install;
1054 1056  
1055 1057          if (mp->ml_address == 0)
1056 1058                  start = MMU_PAGESIZE;
1057 1059          else
1058 1060                  start = mp->ml_address;
1059 1061  
1060 1062          if (start + mp->ml_size > maxaddr)
1061 1063                  size = maxaddr - start;
1062 1064          else
1063 1065                  size = mp->ml_size;
1064 1066  
1065 1067          ddi_err(DER_VERB, rdip,
1066 1068              "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1067 1069              " - 0x%" PRIx64 "]", arena_name, start, start + size);
1068 1070  
1069 1071          /*
1070 1072           * We always allocate in quanta of IMMU_PAGESIZE
1071 1073           */
1072 1074          domain->dom_dvma_arena = vmem_create(arena_name,
1073 1075              (void *)(uintptr_t)start,   /* start addr */
1074 1076              size,                       /* size */
1075 1077              IMMU_PAGESIZE,              /* quantum */
1076 1078              NULL,                       /* afunc */
1077 1079              NULL,                       /* ffunc */
1078 1080              NULL,                       /* source */
1079 1081              0,                          /* qcache_max */
1080 1082              vmem_flags);
1081 1083  
1082 1084          if (domain->dom_dvma_arena == NULL) {
1083 1085                  ddi_err(DER_PANIC, rdip,
1084 1086                      "Failed to allocate DVMA arena(%s) "
1085 1087                      "for domain ID (%d)", arena_name, domain->dom_did);
1086 1088                  /*NOTREACHED*/
1087 1089          }
1088 1090  
1089 1091          mp = mp->ml_next;
1090 1092          while (mp) {
1091 1093  
1092 1094                  if (mp->ml_address == 0)
1093 1095                          start = MMU_PAGESIZE;
1094 1096                  else
1095 1097                          start = mp->ml_address;
1096 1098  
1097 1099                  if (start + mp->ml_size > maxaddr)
1098 1100                          size = maxaddr - start;
1099 1101                  else
1100 1102                          size = mp->ml_size;
1101 1103  
1102 1104                  ddi_err(DER_VERB, rdip,
1103 1105                      "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1104 1106                      " - 0x%" PRIx64 "]", arena_name, start,
1105 1107                      start + size);
1106 1108  
1107 1109                  vmem_ret = vmem_add(domain->dom_dvma_arena,
1108 1110                      (void *)(uintptr_t)start, size,  vmem_flags);
1109 1111  
1110 1112                  if (vmem_ret == NULL) {
1111 1113                          ddi_err(DER_PANIC, rdip,
1112 1114                              "Failed to allocate DVMA arena(%s) "
1113 1115                              "for domain ID (%d)",
1114 1116                              arena_name, domain->dom_did);
1115 1117                          /*NOTREACHED*/
1116 1118                  }
1117 1119                  mp = mp->ml_next;
1118 1120          }
1119 1121          memlist_read_unlock();
1120 1122  }
1121 1123  
1122 1124  /* ################################### DOMAIN CODE ######################### */
1123 1125  
1124 1126  /*
1125 1127   * Set the domain and domain-dip for a dip
1126 1128   */
1127 1129  static void
1128 1130  set_domain(
1129 1131          dev_info_t *dip,
1130 1132          dev_info_t *ddip,
1131 1133          domain_t *domain)
1132 1134  {
1133 1135          immu_devi_t *immu_devi;
1134 1136          domain_t *fdomain;
1135 1137          dev_info_t *fddip;
1136 1138  
1137 1139          immu_devi = immu_devi_get(dip);
1138 1140  
1139 1141          mutex_enter(&(DEVI(dip)->devi_lock));
1140 1142          fddip = immu_devi->imd_ddip;
1141 1143          fdomain = immu_devi->imd_domain;
1142 1144  
1143 1145          if (fddip) {
1144 1146                  ASSERT(fddip == ddip);
1145 1147          } else {
1146 1148                  immu_devi->imd_ddip = ddip;
1147 1149          }
1148 1150  
1149 1151          if (fdomain) {
1150 1152                  ASSERT(fdomain == domain);
1151 1153          } else {
1152 1154                  immu_devi->imd_domain = domain;
1153 1155          }
1154 1156          mutex_exit(&(DEVI(dip)->devi_lock));
1155 1157  }
1156 1158  
1157 1159  /*
1158 1160   * device_domain()
1159 1161   *      Get domain for a device. The domain may be global in which case it
1160 1162   *      is shared between all IOMMU units. Due to potential AGAW differences
1161 1163   *      between IOMMU units, such global domains *have to be* UNITY mapping
1162 1164   *      domains. Alternatively, the domain may be local to a IOMMU unit.
1163 1165   *      Local domains may be shared or immu_devi, although the
1164 1166   *      scope of sharing
1165 1167   *      is restricted to devices controlled by the IOMMU unit to
1166 1168   *      which the domain
1167 1169   *      belongs. If shared, they (currently) have to be UNITY domains. If
1168 1170   *      immu_devi a domain may be either UNITY or translation (XLATE) domain.
1169 1171   */
1170 1172  static domain_t *
1171 1173  device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1172 1174  {
1173 1175          dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1174 1176          immu_t *immu;
1175 1177          domain_t *domain;
1176 1178          dvma_arg_t dvarg = {0};
1177 1179          int level;
1178 1180  
1179 1181          *ddipp = NULL;
1180 1182  
1181 1183          /*
1182 1184           * Check if the domain is already set. This is usually true
1183 1185           * if this is not the first DVMA transaction.
1184 1186           */
1185 1187          ddip = NULL;
1186 1188          domain = immu_devi_domain(rdip, &ddip);
1187 1189          if (domain) {
1188 1190                  *ddipp = ddip;
1189 1191                  return (domain);
1190 1192          }
1191 1193  
1192 1194          immu = immu_dvma_get_immu(rdip, immu_flags);
1193 1195          if (immu == NULL) {
1194 1196                  /*
1195 1197                   * possible that there is no IOMMU unit for this device
1196 1198                   * - BIOS bugs are one example.
1197 1199                   */
1198 1200                  ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1199 1201                  return (NULL);
1200 1202          }
1201 1203  
1202 1204          immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1203 1205  
1204 1206          dvarg.dva_rdip = rdip;
1205 1207          dvarg.dva_ddip = NULL;
1206 1208          dvarg.dva_domain = NULL;
1207 1209          dvarg.dva_flags = immu_flags;
1208 1210          level = 0;
1209 1211          if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1210 1212              &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1211 1213                  /*
1212 1214                   * maybe low memory. return error,
1213 1215                   * so driver tries again later
1214 1216                   */
1215 1217                  return (NULL);
1216 1218          }
1217 1219  
1218 1220          /* should have walked at least 1 dip (i.e. edip) */
1219 1221          ASSERT(level > 0);
1220 1222  
1221 1223          ddip = dvarg.dva_ddip;  /* must be present */
1222 1224          domain = dvarg.dva_domain;      /* may be NULL */
1223 1225  
1224 1226          /*
1225 1227           * We may find the domain during our ancestor walk on any one of our
1226 1228           * ancestor dips, If the domain is found then the domain-dip
1227 1229           * (i.e. ddip) will also be found in the same immu_devi struct.
1228 1230           * The domain-dip is the highest ancestor dip which shares the
1229 1231           * same domain with edip.
1230 1232           * The domain may or may not be found, but the domain dip must
1231 1233           * be found.
1232 1234           */
1233 1235          if (ddip == NULL) {
1234 1236                  ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1235 1237                  return (NULL);
1236 1238          }
1237 1239  
1238 1240          /*
1239 1241           * Did we find a domain ?
1240 1242           */
1241 1243          if (domain) {
1242 1244                  goto found;
1243 1245          }
1244 1246  
1245 1247          /* nope, so allocate */
1246 1248          domain = domain_create(immu, ddip, rdip, immu_flags);
1247 1249          if (domain == NULL) {
1248 1250                  return (NULL);
1249 1251          }
1250 1252  
1251 1253          /*FALLTHROUGH*/
1252 1254  found:
1253 1255          /*
1254 1256           * We know *domain *is* the right domain, so panic if
1255 1257           * another domain is set for either the request-dip or
1256 1258           * effective dip.
1257 1259           */
1258 1260          set_domain(ddip, ddip, domain);
1259 1261          set_domain(rdip, ddip, domain);
1260 1262  
1261 1263          *ddipp = ddip;
1262 1264          return (domain);
1263 1265  }
1264 1266  
1265 1267  static void
1266 1268  create_unity_domain(immu_t *immu)
1267 1269  {
1268 1270          domain_t *domain;
1269 1271  
1270 1272          /* domain created during boot and always use sleep flag */
1271 1273          domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1272 1274  
1273 1275          rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1274 1276  
1275 1277          domain->dom_did = IMMU_UNITY_DID;
1276 1278          domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1277 1279  
1278 1280          domain->dom_immu = immu;
1279 1281          immu->immu_unity_domain = domain;
1280 1282  
1281 1283          /*
1282 1284           * Setup the domain's initial page table
1283 1285           * should never fail.
1284 1286           */
1285 1287          domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1286 1288          pgtable_zero(domain->dom_pgtable_root);
1287 1289  
1288 1290          /*
1289 1291           * Only map all physical memory in to the unity domain
1290 1292           * if passthrough is not supported. If it is supported,
1291 1293           * passthrough is set in the context entry instead.
1292 1294           */
1293 1295          if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1294 1296                  map_unity_domain(domain);
1295 1297  
1296 1298  
1297 1299          /*
1298 1300           * put it on the system-wide UNITY domain list
1299 1301           */
1300 1302          mutex_enter(&(immu_domain_lock));
1301 1303          list_insert_tail(&immu_unity_domain_list, domain);
1302 1304          mutex_exit(&(immu_domain_lock));
1303 1305  }
1304 1306  
1305 1307  /*
1306 1308   * ddip is the domain-dip - the topmost dip in a domain
1307 1309   * rdip is the requesting-dip - the device which is
1308 1310   * requesting DVMA setup
1309 1311   * if domain is a non-shared domain rdip == ddip
1310 1312   */
1311 1313  static domain_t *
1312 1314  domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1313 1315      immu_flags_t immu_flags)
1314 1316  {
1315 1317          int kmflags;
1316 1318          domain_t *domain;
1317 1319          char mod_hash_name[128];
1318 1320          immu_devi_t *immu_devi;
1319 1321          int did;
1320 1322          immu_dcookie_t dcookies[1] = {0};
1321 1323          int dcount = 0;
1322 1324  
1323 1325          immu_devi = immu_devi_get(rdip);
1324 1326  
1325 1327          /*
1326 1328           * First allocate a domainid.
1327 1329           * This routine will never fail, since if we run out
1328 1330           * of domains the unity domain will be allocated.
1329 1331           */
1330 1332          did = did_alloc(immu, rdip, ddip, immu_flags);
1331 1333          if (did == IMMU_UNITY_DID) {
1332 1334                  /* domain overflow */
1333 1335                  ASSERT(immu->immu_unity_domain);
1334 1336                  return (immu->immu_unity_domain);
1335 1337          }
1336 1338  
1337 1339          kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1338 1340          domain = kmem_zalloc(sizeof (domain_t), kmflags);
1339 1341          if (domain == NULL) {
1340 1342                  ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1341 1343                      "structure for device. IOMMU unit: %s", immu->immu_name);
1342 1344                  /*NOTREACHED*/
1343 1345          }
1344 1346  
1345 1347          rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1346 1348  
1347 1349          (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1348 1350              "immu%s-domain%d-pava-hash", immu->immu_name, did);
1349 1351  
1350 1352          domain->dom_did = did;
1351 1353          domain->dom_immu = immu;
1352 1354          domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1353 1355          domain->dom_dip = ddip;
1354 1356  
1355 1357          /*
1356 1358           * Create xlate DVMA arena for this domain.
1357 1359           */
1358 1360          create_xlate_arena(immu, domain, rdip, immu_flags);
1359 1361  
1360 1362          /*
1361 1363           * Setup the domain's initial page table
1362 1364           */
1363 1365          domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1364 1366          if (domain->dom_pgtable_root == NULL) {
1365 1367                  ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1366 1368                      "pgtable for domain (%d). IOMMU unit: %s",
1367 1369                      domain->dom_did, immu->immu_name);
1368 1370                  /*NOTREACHED*/
1369 1371          }
1370 1372          pgtable_zero(domain->dom_pgtable_root);
1371 1373  
1372 1374          /*
1373 1375           * Since this is a immu unit-specific domain, put it on
1374 1376           * the per-immu domain list.
1375 1377           */
1376 1378          mutex_enter(&(immu->immu_lock));
1377 1379          list_insert_head(&immu->immu_domain_list, domain);
1378 1380          mutex_exit(&(immu->immu_lock));
1379 1381  
1380 1382          /*
1381 1383           * Also put it on the system-wide xlate domain list
1382 1384           */
1383 1385          mutex_enter(&(immu_domain_lock));
1384 1386          list_insert_head(&immu_xlate_domain_list, domain);
1385 1387          mutex_exit(&(immu_domain_lock));
1386 1388  
1387 1389          bdf_domain_insert(immu_devi, domain);
1388 1390  
1389 1391  #ifdef BUGGY_DRIVERS
1390 1392          /*
1391 1393           * Map page0. Some broken HW/FW access it.
1392 1394           */
1393 1395          dcookies[0].dck_paddr = 0;
1394 1396          dcookies[0].dck_npages = 1;
1395 1397          dcount = 1;
1396 1398          (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1397 1399              IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1398 1400  #endif
1399 1401          return (domain);
1400 1402  }
1401 1403  
1402 1404  /*
1403 1405   * Create domainid arena.
1404 1406   * Domainid 0 is reserved by Vt-d spec and cannot be used by
1405 1407   * system software.
1406 1408   * Domainid 1 is reserved by solaris and used for *all* of the following:
1407 1409   *      as the "uninitialized" domain - For devices not yet controlled
1408 1410   *      by Solaris
1409 1411   *      as the "unity" domain - For devices that will always belong
1410 1412   *      to the unity domain
1411 1413   *      as the "overflow" domain - Used for any new device after we
1412 1414   *      run out of domains
1413 1415   * All of the above domains map into a single domain with
1414 1416   * domainid 1 and UNITY DVMA mapping
1415 1417   * Each IMMU unity has its own unity/uninit/overflow domain
1416 1418   */
1417 1419  static void
1418 1420  did_init(immu_t *immu)
1419 1421  {
1420 1422          (void) snprintf(immu->immu_did_arena_name,
1421 1423              sizeof (immu->immu_did_arena_name),
1422 1424              "%s_domainid_arena", immu->immu_name);
1423 1425  
1424 1426          ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1425 1427              immu->immu_did_arena_name);
1426 1428  
1427 1429          immu->immu_did_arena = vmem_create(
1428 1430              immu->immu_did_arena_name,
1429 1431              (void *)(uintptr_t)(IMMU_UNITY_DID + 1),   /* start addr */
1430 1432              immu->immu_max_domains - IMMU_UNITY_DID,
1431 1433              1,                          /* quantum */
1432 1434              NULL,                       /* afunc */
1433 1435              NULL,                       /* ffunc */
1434 1436              NULL,                       /* source */
1435 1437              0,                          /* qcache_max */
1436 1438              VM_SLEEP);
1437 1439  
1438 1440          /* Even with SLEEP flag, vmem_create() can fail */
1439 1441          if (immu->immu_did_arena == NULL) {
1440 1442                  ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1441 1443                      "IOMMU domainid allocator: %s", immu->immu_name,
1442 1444                      immu->immu_did_arena_name);
1443 1445          }
1444 1446  }
1445 1447  
1446 1448  /* #########################  CONTEXT CODE ################################# */
1447 1449  
1448 1450  static void
1449 1451  context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1450 1452      int bus, int devfunc)
1451 1453  {
1452 1454          pgtable_t *context;
1453 1455          pgtable_t *pgtable_root;
1454 1456          hw_rce_t *hw_rent;
1455 1457          hw_rce_t *hw_cent;
1456 1458          hw_rce_t *ctxp;
1457 1459          int sid;
1458 1460          krw_t rwtype;
1459 1461          boolean_t fill_root;
1460 1462          boolean_t fill_ctx;
1461 1463  
1462 1464          pgtable_root = domain->dom_pgtable_root;
1463 1465  
1464 1466          ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1465 1467          context = *(pgtable_t **)(ctxp + bus);
1466 1468          hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1467 1469  
1468 1470          fill_root = B_FALSE;
1469 1471          fill_ctx = B_FALSE;
1470 1472  
1471 1473          /* Check the most common case first with reader lock */
1472 1474          rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1473 1475          rwtype = RW_READER;
1474 1476  again:
1475 1477          if (ROOT_GET_P(hw_rent)) {
1476 1478                  hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1477 1479                  if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1478 1480                          rw_exit(&(immu->immu_ctx_rwlock));
1479 1481                          return;
1480 1482                  } else {
1481 1483                          fill_ctx = B_TRUE;
1482 1484                  }
1483 1485          } else {
1484 1486                  fill_root = B_TRUE;
1485 1487                  fill_ctx = B_TRUE;
1486 1488          }
1487 1489  
1488 1490          if (rwtype == RW_READER &&
1489 1491              rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1490 1492                  rw_exit(&(immu->immu_ctx_rwlock));
1491 1493                  rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1492 1494                  rwtype = RW_WRITER;
1493 1495                  goto again;
1494 1496          }
1495 1497          rwtype = RW_WRITER;
1496 1498  
1497 1499          if (fill_root == B_TRUE) {
1498 1500                  ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1499 1501                  ROOT_SET_P(hw_rent);
1500 1502                  immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1501 1503          }
1502 1504  
1503 1505          if (fill_ctx == B_TRUE) {
1504 1506                  hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1505 1507                  /* need to disable context entry before reprogramming it */
1506 1508                  bzero(hw_cent, sizeof (hw_rce_t));
1507 1509  
1508 1510                  /* flush caches */
1509 1511                  immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1510 1512  
1511 1513                  sid = ((bus << 8) | devfunc);
1512 1514                  immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1513 1515                      &immu->immu_ctx_inv_wait);
1514 1516  
1515 1517                  CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1516 1518                  CONT_SET_DID(hw_cent, domain->dom_did);
1517 1519                  CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1518 1520                  CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1519 1521                  if (domain->dom_did == IMMU_UNITY_DID &&
1520 1522                      IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1521 1523                          CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1522 1524                  else
1523 1525                          /*LINTED*/
1524 1526                          CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1525 1527                  CONT_SET_P(hw_cent);
1526 1528                  if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1527 1529                          CONT_SET_EH(hw_cent);
1528 1530                          if (immu_use_alh)
1529 1531                                  CONT_SET_ALH(hw_cent);
1530 1532                  }
1531 1533                  immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1532 1534          }
1533 1535          rw_exit(&(immu->immu_ctx_rwlock));
1534 1536  }
1535 1537  
1536 1538  static pgtable_t *
1537 1539  context_create(immu_t *immu)
1538 1540  {
1539 1541          int     bus;
1540 1542          int     devfunc;
1541 1543          pgtable_t *root_table;
1542 1544          pgtable_t *context;
1543 1545          pgtable_t *pgtable_root;
1544 1546          hw_rce_t *ctxp;
1545 1547          hw_rce_t *hw_rent;
1546 1548          hw_rce_t *hw_cent;
1547 1549  
1548 1550          /* Allocate a zeroed root table (4K 256b entries) */
1549 1551          root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1550 1552          pgtable_zero(root_table);
1551 1553  
1552 1554          /*
1553 1555           * Setup context tables for all possible root table entries.
1554 1556           * Start out with unity domains for all entries.
1555 1557           */
1556 1558          ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1557 1559          hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1558 1560          for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1559 1561                  context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1560 1562                  pgtable_zero(context);
1561 1563                  ROOT_SET_P(hw_rent);
1562 1564                  ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1563 1565                  hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1564 1566                  for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1565 1567                      devfunc++, hw_cent++) {
1566 1568                          pgtable_root =
1567 1569                              immu->immu_unity_domain->dom_pgtable_root;
1568 1570                          CONT_SET_DID(hw_cent,
1569 1571                              immu->immu_unity_domain->dom_did);
1570 1572                          CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1571 1573                          CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1572 1574                          if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1573 1575                                  CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1574 1576                          else
1575 1577                                  /*LINTED*/
1576 1578                                  CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1577 1579                          CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1578 1580                          CONT_SET_P(hw_cent);
1579 1581                  }
1580 1582                  immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1581 1583                  *((pgtable_t **)ctxp) = context;
1582 1584          }
1583 1585  
1584 1586          return (root_table);
1585 1587  }
1586 1588  
1587 1589  /*
1588 1590   * Called during rootnex attach, so no locks needed
1589 1591   */
1590 1592  static void
1591 1593  context_init(immu_t *immu)
1592 1594  {
1593 1595          rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1594 1596  
1595 1597          immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1596 1598  
1597 1599          immu_regs_wbf_flush(immu);
1598 1600  
1599 1601          immu->immu_ctx_root = context_create(immu);
1600 1602  
1601 1603          immu_regs_set_root_table(immu);
1602 1604  
1603 1605          rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1604 1606          immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1605 1607          immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1606 1608          rw_exit(&(immu->immu_ctx_rwlock));
1607 1609  }
1608 1610  
1609 1611  
1610 1612  /*
1611 1613   * Find top pcib
1612 1614   */
1613 1615  static int
1614 1616  find_top_pcib(dev_info_t *dip, void *arg)
1615 1617  {
1616 1618          immu_devi_t *immu_devi;
1617 1619          dev_info_t **pcibdipp = (dev_info_t **)arg;
1618 1620  
1619 1621          immu_devi = immu_devi_get(dip);
1620 1622  
1621 1623          if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1622 1624                  *pcibdipp = dip;
1623 1625          }
1624 1626  
1625 1627          return (DDI_WALK_CONTINUE);
1626 1628  }
1627 1629  
1628 1630  static int
1629 1631  immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1630 1632      dev_info_t *rdip, immu_flags_t immu_flags)
1631 1633  {
1632 1634          immu_devi_t *r_immu_devi;
1633 1635          immu_devi_t *d_immu_devi;
1634 1636          int r_bus;
1635 1637          int d_bus;
1636 1638          int r_devfunc;
1637 1639          int d_devfunc;
1638 1640          immu_pcib_t d_pcib_type;
1639 1641          dev_info_t *pcibdip;
1640 1642  
1641 1643          if (ddip == NULL || rdip == NULL ||
1642 1644              ddip == root_devinfo || rdip == root_devinfo) {
1643 1645                  ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1644 1646                      "request-dip are NULL or are root devinfo");
1645 1647                  return (DDI_FAILURE);
1646 1648          }
1647 1649  
1648 1650          /*
1649 1651           * We need to set the context fields
1650 1652           * based on what type of device rdip and ddip are.
1651 1653           * To do that we need the immu_devi field.
1652 1654           * Set the immu_devi field (if not already set)
1653 1655           */
1654 1656          if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1655 1657                  ddi_err(DER_MODE, rdip,
1656 1658                      "immu_context_update: failed to set immu_devi for ddip");
1657 1659                  return (DDI_FAILURE);
1658 1660          }
1659 1661  
1660 1662          if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1661 1663                  ddi_err(DER_MODE, rdip,
1662 1664                      "immu_context_update: failed to set immu_devi for rdip");
1663 1665                  return (DDI_FAILURE);
1664 1666          }
1665 1667  
1666 1668          d_immu_devi = immu_devi_get(ddip);
1667 1669          r_immu_devi = immu_devi_get(rdip);
1668 1670  
1669 1671          d_bus = d_immu_devi->imd_bus;
1670 1672          d_devfunc = d_immu_devi->imd_devfunc;
1671 1673          d_pcib_type = d_immu_devi->imd_pcib_type;
1672 1674          r_bus = r_immu_devi->imd_bus;
1673 1675          r_devfunc = r_immu_devi->imd_devfunc;
1674 1676  
1675 1677          if (rdip == ddip) {
1676 1678                  /* rdip is a PCIE device. set context for it only */
1677 1679                  context_set(immu, domain, immu->immu_ctx_root, r_bus,
1678 1680                      r_devfunc);
1679 1681  #ifdef BUGGY_DRIVERS
1680 1682          } else if (r_immu_devi == d_immu_devi) {
1681 1683  #ifdef TEST
1682 1684                  ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1683 1685                      "0x%lx are identical", rdip, ddip);
1684 1686  #endif
1685 1687                  /* rdip is a PCIE device. set context for it only */
1686 1688                  context_set(immu, domain, immu->immu_ctx_root, r_bus,
1687 1689                      r_devfunc);
1688 1690  #endif
1689 1691          } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1690 1692                  /*
1691 1693                   * ddip is a PCIE_PCI bridge. Set context for ddip's
1692 1694                   * secondary bus. If rdip is on ddip's secondary
1693 1695                   * bus, set context for rdip. Else, set context
1694 1696                   * for rdip's PCI bridge on ddip's secondary bus.
1695 1697                   */
1696 1698                  context_set(immu, domain, immu->immu_ctx_root,
1697 1699                      d_immu_devi->imd_sec, 0);
1698 1700                  if (d_immu_devi->imd_sec == r_bus) {
1699 1701                          context_set(immu, domain, immu->immu_ctx_root,
1700 1702                              r_bus, r_devfunc);
1701 1703                  } else {
1702 1704                          pcibdip = NULL;
1703 1705                          if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1704 1706                              &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1705 1707                              pcibdip != NULL) {
1706 1708                                  r_immu_devi = immu_devi_get(pcibdip);
1707 1709                                  r_bus = r_immu_devi->imd_bus;
1708 1710                                  r_devfunc = r_immu_devi->imd_devfunc;
1709 1711                                  context_set(immu, domain, immu->immu_ctx_root,
1710 1712                                      r_bus, r_devfunc);
1711 1713                          } else {
1712 1714                                  ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1713 1715                                      " bridge for PCI device");
1714 1716                                  /*NOTREACHED*/
1715 1717                          }
1716 1718                  }
1717 1719          } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1718 1720                  context_set(immu, domain, immu->immu_ctx_root, d_bus,
1719 1721                      d_devfunc);
1720 1722          } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1721 1723                  /*
1722 1724                   * ddip is a PCIE device which has a non-PCI device under it
1723 1725                   * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1724 1726                   */
1725 1727                  context_set(immu, domain, immu->immu_ctx_root, d_bus,
1726 1728                      d_devfunc);
1727 1729          } else {
1728 1730                  ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1729 1731                      "set iommu context.");
1730 1732                  /*NOTREACHED*/
1731 1733          }
1732 1734  
1733 1735          /* XXX do we need a membar_producer() here */
1734 1736          return (DDI_SUCCESS);
1735 1737  }
1736 1738  
1737 1739  /* ##################### END CONTEXT CODE ################################## */
1738 1740  /* ##################### MAPPING CODE ################################## */
1739 1741  
1740 1742  
1741 1743  #ifdef DEBUG
1742 1744  static boolean_t
1743 1745  PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1744 1746      dev_info_t *rdip, immu_flags_t immu_flags)
1745 1747  {
1746 1748          /* The PDTE must be set i.e. present bit is set */
1747 1749          if (!PDTE_P(pdte)) {
1748 1750                  ddi_err(DER_MODE, rdip, "No present flag");
1749 1751                  return (B_FALSE);
1750 1752          }
1751 1753  
1752 1754          /*
1753 1755           * Just assert to check most significant system software field
1754 1756           * (PDTE_SW4) as it is same as present bit and we
1755 1757           * checked that above
1756 1758           */
1757 1759          ASSERT(PDTE_SW4(pdte));
1758 1760  
1759 1761          /*
1760 1762           * TM field should be clear if not reserved.
1761 1763           * non-leaf is always reserved
1762 1764           */
1763 1765          if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1764 1766                  if (PDTE_TM(pdte)) {
1765 1767                          ddi_err(DER_MODE, rdip, "TM flag set");
1766 1768                          return (B_FALSE);
1767 1769                  }
1768 1770          }
1769 1771  
1770 1772          /*
1771 1773           * The SW3 field is not used and must be clear
1772 1774           */
1773 1775          if (PDTE_SW3(pdte)) {
1774 1776                  ddi_err(DER_MODE, rdip, "SW3 set");
1775 1777                  return (B_FALSE);
1776 1778          }
1777 1779  
1778 1780          /*
1779 1781           * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1780 1782           */
1781 1783          if (next == NULL) {
1782 1784                  ASSERT(paddr % IMMU_PAGESIZE == 0);
1783 1785                  if (PDTE_PADDR(pdte) != paddr) {
1784 1786                          ddi_err(DER_MODE, rdip,
1785 1787                              "PTE paddr mismatch: %lx != %lx",
1786 1788                              PDTE_PADDR(pdte), paddr);
1787 1789                          return (B_FALSE);
1788 1790                  }
1789 1791          } else {
1790 1792                  if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1791 1793                          ddi_err(DER_MODE, rdip,
1792 1794                              "PDE paddr mismatch: %lx != %lx",
1793 1795                              PDTE_PADDR(pdte), next->hwpg_paddr);
1794 1796                          return (B_FALSE);
1795 1797                  }
1796 1798          }
1797 1799  
1798 1800          /*
1799 1801           * SNP field should be clear if not reserved.
1800 1802           * non-leaf is always reserved
1801 1803           */
1802 1804          if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1803 1805                  if (PDTE_SNP(pdte)) {
1804 1806                          ddi_err(DER_MODE, rdip, "SNP set");
1805 1807                          return (B_FALSE);
1806 1808                  }
1807 1809          }
1808 1810  
1809 1811          /* second field available for system software should be clear */
1810 1812          if (PDTE_SW2(pdte)) {
1811 1813                  ddi_err(DER_MODE, rdip, "SW2 set");
1812 1814                  return (B_FALSE);
1813 1815          }
1814 1816  
1815 1817          /* Super pages field should be clear */
1816 1818          if (PDTE_SP(pdte)) {
1817 1819                  ddi_err(DER_MODE, rdip, "SP set");
1818 1820                  return (B_FALSE);
1819 1821          }
1820 1822  
1821 1823          /*
1822 1824           * least significant field available for
1823 1825           * system software should be clear
1824 1826           */
1825 1827          if (PDTE_SW1(pdte)) {
1826 1828                  ddi_err(DER_MODE, rdip, "SW1 set");
1827 1829                  return (B_FALSE);
1828 1830          }
1829 1831  
1830 1832          if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1831 1833                  ddi_err(DER_MODE, rdip, "READ not set");
1832 1834                  return (B_FALSE);
1833 1835          }
1834 1836  
1835 1837          if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1836 1838                  ddi_err(DER_MODE, rdip, "WRITE not set");
1837 1839                  return (B_FALSE);
1838 1840          }
1839 1841  
1840 1842          return (B_TRUE);
1841 1843  }
1842 1844  #endif
1843 1845  
1844 1846  /*ARGSUSED*/
1845 1847  static void
1846 1848  PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1847 1849      uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1848 1850  {
1849 1851          uint64_t npages;
1850 1852          uint64_t dvma;
1851 1853          pgtable_t *pgtable;
1852 1854          hw_pdte_t *hwp;
1853 1855          hw_pdte_t *shwp;
1854 1856          int idx;
1855 1857  
1856 1858          pgtable = xlate->xlt_pgtable;
1857 1859          idx = xlate->xlt_idx;
1858 1860  
1859 1861          dvma = *dvma_ptr;
1860 1862          npages = *npages_ptr;
1861 1863  
1862 1864          /*
1863 1865           * since a caller gets a unique dvma for a physical address,
1864 1866           * no other concurrent thread will be writing to the same
1865 1867           * PTE even if it has the same paddr. So no locks needed.
1866 1868           */
1867 1869          shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1868 1870  
1869 1871          hwp = shwp;
1870 1872          for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1871 1873                  PDTE_CLEAR_P(*hwp);
1872 1874                  dvma += IMMU_PAGESIZE;
1873 1875                  npages--;
1874 1876          }
1875 1877  
1876 1878          *dvma_ptr = dvma;
1877 1879          *npages_ptr = npages;
1878 1880  
1879 1881          xlate->xlt_idx = idx;
1880 1882  }
1881 1883  
1882 1884  static void
1883 1885  xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1884 1886  {
1885 1887          int level;
1886 1888          uint64_t offbits;
1887 1889  
1888 1890          /*
1889 1891           * Skip the first 12 bits which is the offset into
1890 1892           * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1891 1893           */
1892 1894          offbits = dvma >> IMMU_PAGESHIFT;
1893 1895  
1894 1896          /* skip to level 1 i.e. leaf PTE */
1895 1897          for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1896 1898                  xlate->xlt_level = level;
1897 1899                  xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1898 1900                  ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1899 1901                  xlate->xlt_pgtable = NULL;
1900 1902                  offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1901 1903          }
1902 1904  }
1903 1905  
1904 1906  /*
1905 1907   * Read the pgtables
1906 1908   */
1907 1909  static boolean_t
1908 1910  PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1909 1911  {
1910 1912          pgtable_t *pgtable;
1911 1913          pgtable_t *next;
1912 1914          uint_t idx;
1913 1915  
1914 1916          /* start with highest level pgtable i.e. root */
1915 1917          xlate += nlevels;
1916 1918  
1917 1919          if (xlate->xlt_pgtable == NULL) {
1918 1920                  xlate->xlt_pgtable = domain->dom_pgtable_root;
1919 1921          }
1920 1922  
1921 1923          for (; xlate->xlt_level > 1; xlate--) {
1922 1924                  idx = xlate->xlt_idx;
1923 1925                  pgtable = xlate->xlt_pgtable;
1924 1926  
1925 1927                  if ((xlate - 1)->xlt_pgtable) {
1926 1928                          continue;
1927 1929                  }
1928 1930  
1929 1931                  /* Lock the pgtable in read mode */
1930 1932                  rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1931 1933  
1932 1934                  /*
1933 1935                   * since we are unmapping, the pgtable should
1934 1936                   * already point to a leafier pgtable.
1935 1937                   */
1936 1938                  next = *(pgtable->swpg_next_array + idx);
1937 1939                  (xlate - 1)->xlt_pgtable = next;
1938 1940                  rw_exit(&(pgtable->swpg_rwlock));
1939 1941                  if (next == NULL)
1940 1942                          return (B_FALSE);
1941 1943          }
1942 1944  
1943 1945          return (B_TRUE);
1944 1946  }
1945 1947  
1946 1948  static void
1947 1949  immu_fault_walk(void *arg, void *base, size_t len)
1948 1950  {
1949 1951          uint64_t dvma, start;
1950 1952  
1951 1953          dvma = *(uint64_t *)arg;
1952 1954          start = (uint64_t)(uintptr_t)base;
1953 1955  
1954 1956          if (dvma >= start && dvma < (start + len)) {
1955 1957                  ddi_err(DER_WARN, NULL,
1956 1958                      "faulting DVMA address is in vmem arena "
1957 1959                      "(%" PRIx64 "-%" PRIx64 ")",
1958 1960                      start, start + len);
1959 1961                  *(uint64_t *)arg = ~0ULL;
1960 1962          }
1961 1963  }
1962 1964  
1963 1965  void
1964 1966  immu_print_fault_info(uint_t sid, uint64_t dvma)
1965 1967  {
1966 1968          int nlevels;
1967 1969          xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1968 1970          xlate_t *xlatep;
1969 1971          hw_pdte_t pte;
1970 1972          domain_t *domain;
1971 1973          immu_t *immu;
1972 1974          uint64_t dvma_arg;
1973 1975  
1974 1976          if (mod_hash_find(bdf_domain_hash,
1975 1977              (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1976 1978                  ddi_err(DER_WARN, NULL,
1977 1979                      "no domain for faulting SID %08x", sid);
1978 1980                  return;
1979 1981          }
1980 1982  
1981 1983          immu = domain->dom_immu;
1982 1984  
1983 1985          dvma_arg = dvma;
1984 1986          vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1985 1987              (void *)&dvma_arg);
1986 1988          if (dvma_arg != ~0ULL)
1987 1989                  ddi_err(DER_WARN, domain->dom_dip,
1988 1990                      "faulting DVMA address is not in vmem arena");
1989 1991  
1990 1992          nlevels = immu->immu_dvma_nlevels;
1991 1993          xlate_setup(dvma, xlate, nlevels);
1992 1994  
1993 1995          if (!PDE_lookup(domain, xlate, nlevels)) {
1994 1996                  ddi_err(DER_WARN, domain->dom_dip,
1995 1997                      "pte not found in domid %d for faulting addr %" PRIx64,
1996 1998                      domain->dom_did, dvma);
1997 1999                  return;
1998 2000          }
1999 2001  
2000 2002          xlatep = &xlate[1];
2001 2003          pte = *((hw_pdte_t *)
2002 2004              (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2003 2005  
2004 2006          ddi_err(DER_WARN, domain->dom_dip,
2005 2007              "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2006 2008              (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2007 2009  }
2008 2010  
2009 2011  /*ARGSUSED*/
2010 2012  static void
2011 2013  PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2012 2014      dev_info_t *rdip, immu_flags_t immu_flags)
2013 2015  {
2014 2016          hw_pdte_t pte;
2015 2017  
2016 2018  #ifndef DEBUG
2017 2019          pte = immu->immu_ptemask;
2018 2020          PDTE_SET_PADDR(pte, paddr);
2019 2021  #else
2020 2022          pte = *hwp;
2021 2023  
2022 2024          if (PDTE_P(pte)) {
2023 2025                  if (PDTE_PADDR(pte) != paddr) {
2024 2026                          ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2025 2027                              PDTE_PADDR(pte), paddr);
2026 2028                  }
2027 2029  #ifdef BUGGY_DRIVERS
2028 2030                  return;
2029 2031  #else
2030 2032                  goto out;
2031 2033  #endif
2032 2034          }
2033 2035  
2034 2036          /* clear TM field if not reserved */
2035 2037          if (immu->immu_TM_reserved == B_FALSE) {
2036 2038                  PDTE_CLEAR_TM(pte);
2037 2039          }
2038 2040  
2039 2041          /* Clear 3rd field for system software  - not used */
2040 2042          PDTE_CLEAR_SW3(pte);
2041 2043  
2042 2044          /* Set paddr */
2043 2045          ASSERT(paddr % IMMU_PAGESIZE == 0);
2044 2046          PDTE_CLEAR_PADDR(pte);
2045 2047          PDTE_SET_PADDR(pte, paddr);
2046 2048  
2047 2049          /*  clear SNP field if not reserved. */
2048 2050          if (immu->immu_SNP_reserved == B_FALSE) {
2049 2051                  PDTE_CLEAR_SNP(pte);
2050 2052          }
2051 2053  
2052 2054          /* Clear SW2 field available for software */
2053 2055          PDTE_CLEAR_SW2(pte);
2054 2056  
2055 2057  
2056 2058          /* SP is don't care for PTEs. Clear it for cleanliness */
2057 2059          PDTE_CLEAR_SP(pte);
2058 2060  
2059 2061          /* Clear SW1 field available for software */
2060 2062          PDTE_CLEAR_SW1(pte);
2061 2063  
2062 2064          /*
2063 2065           * Now that we are done writing the PTE
2064 2066           * set the "present" flag. Note this present
2065 2067           * flag is a bit in the PDE/PTE that the
2066 2068           * spec says is available for system software.
2067 2069           * This is an implementation detail of Solaris
2068 2070           * bare-metal Intel IOMMU.
2069 2071           * The present field in a PDE/PTE is not defined
2070 2072           * by the Vt-d spec
2071 2073           */
2072 2074  
2073 2075          PDTE_SET_P(pte);
2074 2076  
2075 2077          pte |= immu->immu_ptemask;
2076 2078  
2077 2079  out:
2078 2080  #endif /* DEBUG */
2079 2081  #ifdef BUGGY_DRIVERS
2080 2082          PDTE_SET_READ(pte);
2081 2083          PDTE_SET_WRITE(pte);
2082 2084  #else
2083 2085          if (immu_flags & IMMU_FLAGS_READ)
2084 2086                  PDTE_SET_READ(pte);
2085 2087          if (immu_flags & IMMU_FLAGS_WRITE)
2086 2088                  PDTE_SET_WRITE(pte);
2087 2089  #endif /* BUGGY_DRIVERS */
2088 2090  
2089 2091          *hwp = pte;
2090 2092  }
2091 2093  
2092 2094  /*ARGSUSED*/
2093 2095  static void
2094 2096  PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2095 2097      uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2096 2098      int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2097 2099  {
2098 2100          paddr_t paddr;
2099 2101          uint64_t nvpages;
2100 2102          uint64_t nppages;
2101 2103          uint64_t dvma;
2102 2104          pgtable_t *pgtable;
2103 2105          hw_pdte_t *hwp;
2104 2106          hw_pdte_t *shwp;
2105 2107          int idx, nset;
2106 2108          int j;
2107 2109  
2108 2110          pgtable = xlate->xlt_pgtable;
2109 2111          idx = xlate->xlt_idx;
2110 2112  
2111 2113          dvma = *dvma_ptr;
2112 2114          nvpages = *nvpages_ptr;
2113 2115  
2114 2116          /*
2115 2117           * since a caller gets a unique dvma for a physical address,
2116 2118           * no other concurrent thread will be writing to the same
2117 2119           * PTE even if it has the same paddr. So no locks needed.
  
    | ↓ open down ↓ | 2046 lines elided | ↑ open up ↑ | 
2118 2120           */
2119 2121          shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2120 2122  
2121 2123          hwp = shwp;
2122 2124          for (j = dcount - 1; j >= 0; j--) {
2123 2125                  if (nvpages <= dcookies[j].dck_npages)
2124 2126                          break;
2125 2127                  nvpages -= dcookies[j].dck_npages;
2126 2128          }
2127 2129  
     2130 +        VERIFY(j >= 0);
2128 2131          nppages = nvpages;
2129 2132          paddr = dcookies[j].dck_paddr +
2130 2133              (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2131 2134  
2132 2135          nvpages = *nvpages_ptr;
2133 2136          nset = 0;
2134 2137          for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2135 2138                  PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2136 2139                  nset++;
2137 2140  
2138 2141                  ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2139 2142                      == B_TRUE);
2140 2143                  nppages--;
2141 2144                  nvpages--;
2142 2145                  paddr += IMMU_PAGESIZE;
2143 2146                  dvma += IMMU_PAGESIZE;
2144 2147  
2145 2148                  if (nppages == 0) {
2146 2149                          j++;
2147 2150                  }
2148 2151  
2149 2152                  if (j == dcount)
2150 2153                          break;
2151 2154  
2152 2155                  if (nppages == 0) {
2153 2156                          nppages = dcookies[j].dck_npages;
2154 2157                          paddr = dcookies[j].dck_paddr;
2155 2158                  }
2156 2159          }
2157 2160  
2158 2161          if (nvpages) {
2159 2162                  *dvma_ptr = dvma;
2160 2163                  *nvpages_ptr = nvpages;
2161 2164          } else {
2162 2165                  *dvma_ptr = 0;
2163 2166                  *nvpages_ptr = 0;
2164 2167          }
2165 2168  
2166 2169          xlate->xlt_idx = idx;
2167 2170  }
2168 2171  
2169 2172  /*ARGSUSED*/
2170 2173  static void
2171 2174  PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2172 2175      dev_info_t *rdip, immu_flags_t immu_flags)
2173 2176  {
2174 2177          hw_pdte_t pde;
2175 2178  
2176 2179          pde = *hwp;
2177 2180  
2178 2181          /* if PDE is already set, make sure it is correct */
2179 2182          if (PDTE_P(pde)) {
2180 2183                  ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2181 2184  #ifdef BUGGY_DRIVERS
2182 2185                  return;
2183 2186  #else
2184 2187                  goto out;
2185 2188  #endif
2186 2189          }
2187 2190  
2188 2191          /* Dont touch SW4, it is the present bit */
2189 2192  
2190 2193          /* don't touch TM field it is reserved for PDEs */
2191 2194  
2192 2195          /* 3rd field available for system software is not used */
2193 2196          PDTE_CLEAR_SW3(pde);
2194 2197  
2195 2198          /* Set next level pgtable-paddr for PDE */
2196 2199          PDTE_CLEAR_PADDR(pde);
2197 2200          PDTE_SET_PADDR(pde, next->hwpg_paddr);
2198 2201  
2199 2202          /* don't touch SNP field it is reserved for PDEs */
2200 2203  
2201 2204          /* Clear second field available for system software */
2202 2205          PDTE_CLEAR_SW2(pde);
2203 2206  
2204 2207          /* No super pages for PDEs */
2205 2208          PDTE_CLEAR_SP(pde);
2206 2209  
2207 2210          /* Clear SW1 for software */
2208 2211          PDTE_CLEAR_SW1(pde);
2209 2212  
2210 2213          /*
2211 2214           * Now that we are done writing the PDE
2212 2215           * set the "present" flag. Note this present
2213 2216           * flag is a bit in the PDE/PTE that the
2214 2217           * spec says is available for system software.
2215 2218           * This is an implementation detail of Solaris
2216 2219           * base-metal Intel IOMMU.
2217 2220           * The present field in a PDE/PTE is not defined
2218 2221           * by the Vt-d spec
2219 2222           */
2220 2223  
2221 2224  out:
2222 2225  #ifdef  BUGGY_DRIVERS
2223 2226          PDTE_SET_READ(pde);
2224 2227          PDTE_SET_WRITE(pde);
2225 2228  #else
2226 2229          if (immu_flags & IMMU_FLAGS_READ)
2227 2230                  PDTE_SET_READ(pde);
2228 2231          if (immu_flags & IMMU_FLAGS_WRITE)
2229 2232                  PDTE_SET_WRITE(pde);
2230 2233  #endif
2231 2234  
2232 2235          PDTE_SET_P(pde);
2233 2236  
2234 2237          *hwp = pde;
2235 2238  }
2236 2239  
2237 2240  /*
2238 2241   * Used to set PDEs
2239 2242   */
2240 2243  static boolean_t
2241 2244  PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2242 2245      dev_info_t *rdip, immu_flags_t immu_flags)
2243 2246  {
2244 2247          pgtable_t *pgtable;
2245 2248          pgtable_t *new;
2246 2249          pgtable_t *next;
2247 2250          hw_pdte_t *hwp;
2248 2251          int level;
2249 2252          uint_t idx;
2250 2253          krw_t rwtype;
2251 2254          boolean_t set = B_FALSE;
2252 2255  
2253 2256          /* start with highest level pgtable i.e. root */
2254 2257          xlate += nlevels;
2255 2258  
2256 2259          new = NULL;
2257 2260          xlate->xlt_pgtable = domain->dom_pgtable_root;
2258 2261          for (level = nlevels; level > 1; level--, xlate--) {
2259 2262                  idx = xlate->xlt_idx;
2260 2263                  pgtable = xlate->xlt_pgtable;
2261 2264  
2262 2265                  /* Lock the pgtable in READ mode first */
2263 2266                  rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2264 2267                  rwtype = RW_READER;
2265 2268  again:
2266 2269                  hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2267 2270                  next = (pgtable->swpg_next_array)[idx];
2268 2271  
2269 2272                  /*
2270 2273                   * check if leafier level already has a pgtable
2271 2274                   * if yes, verify
2272 2275                   */
2273 2276                  if (next == NULL) {
2274 2277                          if (new == NULL) {
2275 2278  
2276 2279                                  IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2277 2280                                      rdip, int, level);
2278 2281  
2279 2282                                  new = pgtable_alloc(immu, immu_flags);
2280 2283                                  if (new == NULL) {
2281 2284                                          ddi_err(DER_PANIC, rdip,
2282 2285                                              "pgtable alloc err");
2283 2286                                  }
2284 2287                                  pgtable_zero(new);
2285 2288                          }
2286 2289  
2287 2290                          /* Change to a write lock */
2288 2291                          if (rwtype == RW_READER &&
2289 2292                              rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2290 2293                                  rw_exit(&(pgtable->swpg_rwlock));
2291 2294                                  rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2292 2295                                  rwtype = RW_WRITER;
2293 2296                                  goto again;
2294 2297                          }
2295 2298                          rwtype = RW_WRITER;
2296 2299                          next = new;
2297 2300                          (pgtable->swpg_next_array)[idx] = next;
2298 2301                          new = NULL;
2299 2302                          PDE_set_one(immu, hwp, next, rdip, immu_flags);
2300 2303                          set = B_TRUE;
2301 2304                          rw_downgrade(&(pgtable->swpg_rwlock));
2302 2305                          rwtype = RW_READER;
2303 2306                  }
2304 2307  #ifndef  BUGGY_DRIVERS
2305 2308                  else {
2306 2309                          hw_pdte_t pde = *hwp;
2307 2310  
2308 2311                          /*
2309 2312                           * If buggy driver we already set permission
2310 2313                           * READ+WRITE so nothing to do for that case
2311 2314                           * XXX Check that read writer perms change before
2312 2315                           * actually setting perms. Also need to hold lock
2313 2316                           */
2314 2317                          if (immu_flags & IMMU_FLAGS_READ)
2315 2318                                  PDTE_SET_READ(pde);
2316 2319                          if (immu_flags & IMMU_FLAGS_WRITE)
2317 2320                                  PDTE_SET_WRITE(pde);
2318 2321  
2319 2322                          *hwp = pde;
2320 2323                  }
2321 2324  #endif
2322 2325  
2323 2326                  ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2324 2327                      == B_TRUE);
2325 2328  
2326 2329                  (xlate - 1)->xlt_pgtable = next;
2327 2330                  rw_exit(&(pgtable->swpg_rwlock));
2328 2331          }
2329 2332  
2330 2333          if (new) {
2331 2334                  pgtable_free(immu, new);
2332 2335          }
2333 2336  
2334 2337          return (set);
2335 2338  }
2336 2339  
2337 2340  /*
2338 2341   * dvma_map()
2339 2342   *     map a contiguous range of DVMA pages
2340 2343   *
2341 2344   *     immu: IOMMU unit for which we are generating DVMA cookies
2342 2345   *   domain: domain
2343 2346   *    sdvma: Starting dvma
2344 2347   *   spaddr: Starting paddr
2345 2348   *   npages: Number of pages
2346 2349   *     rdip: requesting device
2347 2350   *     immu_flags: flags
2348 2351   */
2349 2352  static boolean_t
2350 2353  dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2351 2354      immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2352 2355      immu_flags_t immu_flags)
2353 2356  {
2354 2357          uint64_t dvma;
2355 2358          uint64_t n;
2356 2359          immu_t *immu = domain->dom_immu;
2357 2360          int nlevels = immu->immu_dvma_nlevels;
2358 2361          xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2359 2362          boolean_t pde_set = B_FALSE;
2360 2363  
2361 2364          n = snvpages;
2362 2365          dvma = sdvma;
2363 2366  
2364 2367          while (n > 0) {
2365 2368                  xlate_setup(dvma, xlate, nlevels);
2366 2369  
2367 2370                  /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2368 2371                  if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2369 2372                      == B_TRUE) {
2370 2373                          pde_set = B_TRUE;
2371 2374                  }
2372 2375  
2373 2376                  /* set all matching ptes that fit into this leaf pgtable */
2374 2377                  PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2375 2378                      dcount, rdip, immu_flags);
2376 2379          }
2377 2380  
2378 2381          return (pde_set);
2379 2382  }
2380 2383  
2381 2384  /*
2382 2385   * dvma_unmap()
2383 2386   *   unmap a range of DVMAs
2384 2387   *
2385 2388   * immu: IOMMU unit state
2386 2389   * domain: domain for requesting device
2387 2390   * ddip: domain-dip
2388 2391   * dvma: starting DVMA
2389 2392   * npages: Number of IMMU pages to be unmapped
2390 2393   * rdip: requesting device
2391 2394   */
2392 2395  static void
2393 2396  dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2394 2397      dev_info_t *rdip)
2395 2398  {
2396 2399          immu_t *immu = domain->dom_immu;
2397 2400          int nlevels = immu->immu_dvma_nlevels;
2398 2401          xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2399 2402          uint64_t n;
2400 2403          uint64_t dvma;
2401 2404  
2402 2405          dvma = sdvma;
2403 2406          n = snpages;
2404 2407  
2405 2408          while (n > 0) {
2406 2409                  /* setup the xlate array */
2407 2410                  xlate_setup(dvma, xlate, nlevels);
2408 2411  
2409 2412                  /* just lookup existing pgtables. Should never fail */
2410 2413                  if (!PDE_lookup(domain, xlate, nlevels))
2411 2414                          ddi_err(DER_PANIC, rdip,
2412 2415                              "PTE not found for addr %" PRIx64,
2413 2416                              (unsigned long long)dvma);
2414 2417  
2415 2418                  /* clear all matching ptes that fit into this leaf pgtable */
2416 2419                  PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2417 2420          }
2418 2421  
2419 2422          /* No need to flush IOTLB after unmap */
2420 2423  }
2421 2424  
2422 2425  static uint64_t
2423 2426  dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2424 2427  {
2425 2428          uint64_t dvma;
2426 2429          size_t xsize, align;
2427 2430          uint64_t minaddr, maxaddr;
2428 2431  
2429 2432          /* parameters */
2430 2433          xsize = npages * IMMU_PAGESIZE;
2431 2434          align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2432 2435          minaddr = dma_attr->dma_attr_addr_lo;
2433 2436          maxaddr = dma_attr->dma_attr_addr_hi + 1;
2434 2437  
2435 2438          /* handle the rollover cases */
2436 2439          if (maxaddr < dma_attr->dma_attr_addr_hi) {
2437 2440                  maxaddr = dma_attr->dma_attr_addr_hi;
2438 2441          }
2439 2442  
2440 2443          /*
2441 2444           * allocate from vmem arena.
2442 2445           */
2443 2446          dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2444 2447              xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2445 2448              (void *)(uintptr_t)maxaddr, kmf);
2446 2449  
2447 2450          return (dvma);
2448 2451  }
2449 2452  
2450 2453  static void
2451 2454  dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2452 2455  {
2453 2456          int nlevels;
2454 2457          xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2455 2458          uint64_t dvma, n;
2456 2459          size_t xsize, align;
2457 2460          uint64_t minaddr, maxaddr, dmamax;
2458 2461          int on, npte, pindex;
2459 2462          hw_pdte_t *shwp;
2460 2463          immu_t *immu;
2461 2464          domain_t *domain;
2462 2465  
2463 2466          /* parameters */
2464 2467          domain = IMMU_DEVI(rdip)->imd_domain;
2465 2468          immu = domain->dom_immu;
2466 2469          nlevels = immu->immu_dvma_nlevels;
2467 2470          xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2468 2471          align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2469 2472          minaddr = dma_attr->dma_attr_addr_lo;
2470 2473          if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2471 2474                  dmamax = dma_attr->dma_attr_seg;
2472 2475          else
2473 2476                  dmamax = dma_attr->dma_attr_addr_hi;
2474 2477          maxaddr = dmamax + 1;
2475 2478  
2476 2479          if (maxaddr < dmamax)
2477 2480                  maxaddr = dmamax;
2478 2481  
2479 2482          dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2480 2483              xsize, align, 0, dma_attr->dma_attr_seg + 1,
2481 2484              (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2482 2485  
2483 2486          ihp->ihp_predvma = dvma;
2484 2487          ihp->ihp_npremapped = 0;
2485 2488          if (dvma == 0)
2486 2489                  return;
2487 2490  
2488 2491          n = IMMU_NPREPTES;
2489 2492          pindex = 0;
2490 2493  
2491 2494          /*
2492 2495           * Set up a mapping at address 0, just so that all PDPs get allocated
2493 2496           * now. Although this initial mapping should never be used,
2494 2497           * explicitly set it to read-only, just to be safe.
2495 2498           */
2496 2499          while (n > 0) {
2497 2500                  xlate_setup(dvma, xlate, nlevels);
2498 2501  
2499 2502                  (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2500 2503                      IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2501 2504  
2502 2505                  xlp = &xlate[1];
2503 2506                  shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2504 2507                      + xlp->xlt_idx;
2505 2508                  on = n;
2506 2509  
2507 2510                  PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2508 2511                      1, rdip, IMMU_FLAGS_READ);
2509 2512  
2510 2513                  npte = on - n;
2511 2514  
2512 2515                  while (npte > 0) {
2513 2516                          ihp->ihp_preptes[pindex++] = shwp;
2514 2517  #ifdef BUGGY_DRIVERS
2515 2518                          PDTE_CLEAR_WRITE(*shwp);
2516 2519  #endif
2517 2520                          shwp++;
2518 2521                          npte--;
2519 2522                  }
2520 2523          }
2521 2524  }
2522 2525  
2523 2526  static void
2524 2527  dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2525 2528  {
2526 2529          domain_t *domain;
2527 2530  
2528 2531          domain = IMMU_DEVI(rdip)->imd_domain;
2529 2532  
2530 2533          if (ihp->ihp_predvma != 0) {
2531 2534                  dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2532 2535                  vmem_free(domain->dom_dvma_arena,
2533 2536                      (void *)(uintptr_t)ihp->ihp_predvma,
2534 2537                      IMMU_NPREPTES * IMMU_PAGESIZE);
2535 2538          }
2536 2539  }
2537 2540  
2538 2541  static void
2539 2542  dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2540 2543  {
2541 2544          uint64_t size = npages * IMMU_PAGESIZE;
2542 2545  
2543 2546          if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2544 2547                  return;
2545 2548  
2546 2549          vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2547 2550  }
2548 2551  
2549 2552  static int
2550 2553  immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2551 2554      immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2552 2555      ddi_dma_obj_t *dma_out)
2553 2556  {
2554 2557          domain_t *domain;
2555 2558          immu_t *immu;
2556 2559          immu_flags_t immu_flags;
2557 2560          ddi_dma_atyp_t buftype;
2558 2561          ddi_dma_obj_t *dmar_object;
2559 2562          ddi_dma_attr_t *attrp;
2560 2563          uint64_t offset, paddr, dvma, sdvma, rwmask;
2561 2564          size_t npages, npgalloc;
2562 2565          uint_t psize, size, pcnt, dmax;
2563 2566          page_t **pparray;
2564 2567          caddr_t vaddr;
2565 2568          page_t *page;
2566 2569          struct as *vas;
2567 2570          immu_dcookie_t *dcookies;
2568 2571          int pde_set;
2569 2572  
2570 2573          domain = IMMU_DEVI(rdip)->imd_domain;
2571 2574          immu = domain->dom_immu;
2572 2575          immu_flags = dma_to_immu_flags(dmareq);
2573 2576  
2574 2577          attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2575 2578  
2576 2579          dmar_object = &dmareq->dmar_object;
2577 2580          pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2578 2581          vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2579 2582          buftype = dmar_object->dmao_type;
2580 2583          size = dmar_object->dmao_size;
2581 2584  
2582 2585          IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2583 2586              buftype, uint_t, size);
2584 2587  
2585 2588          dcookies = &ihp->ihp_dcookies[0];
2586 2589  
2587 2590          pcnt = dmax = 0;
2588 2591  
2589 2592          /* retrieve paddr, psize, offset from dmareq */
2590 2593          if (buftype == DMA_OTYP_PAGES) {
2591 2594                  page = dmar_object->dmao_obj.pp_obj.pp_pp;
2592 2595                  offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2593 2596                      MMU_PAGEOFFSET;
2594 2597                  paddr = pfn_to_pa(page->p_pagenum) + offset;
2595 2598                  psize = MIN((MMU_PAGESIZE - offset), size);
2596 2599                  page = page->p_next;
2597 2600                  vas = dmar_object->dmao_obj.virt_obj.v_as;
2598 2601          } else {
2599 2602                  if (vas == NULL) {
2600 2603                          vas = &kas;
2601 2604                  }
2602 2605                  offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2603 2606                  if (pparray != NULL) {
2604 2607                          paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2605 2608                          psize = MIN((MMU_PAGESIZE - offset), size);
2606 2609                          pcnt++;
2607 2610                  } else {
2608 2611                          paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2609 2612                              vaddr)) + offset;
2610 2613                          psize = MIN(size, (MMU_PAGESIZE - offset));
2611 2614                          vaddr += psize;
2612 2615                  }
2613 2616          }
2614 2617  
2615 2618          npgalloc = IMMU_BTOPR(size + offset);
2616 2619  
2617 2620          if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2618 2621  #ifdef BUGGY_DRIVERS
2619 2622                  rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2620 2623  #else
2621 2624                  rwmask = immu->immu_ptemask;
2622 2625                  if (immu_flags & IMMU_FLAGS_READ)
2623 2626                          rwmask |= PDTE_MASK_R;
2624 2627                  if (immu_flags & IMMU_FLAGS_WRITE)
2625 2628                          rwmask |= PDTE_MASK_W;
2626 2629  #endif
2627 2630  #ifdef DEBUG
2628 2631                  rwmask |= PDTE_MASK_P;
2629 2632  #endif
2630 2633                  sdvma = ihp->ihp_predvma;
2631 2634                  ihp->ihp_npremapped = npgalloc;
2632 2635                  *ihp->ihp_preptes[0] =
2633 2636                      PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2634 2637          } else {
2635 2638                  ihp->ihp_npremapped = 0;
2636 2639                  sdvma = dvma_alloc(domain, attrp, npgalloc,
2637 2640                      dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2638 2641                  if (sdvma == 0)
2639 2642                          return (DDI_DMA_NORESOURCES);
2640 2643  
2641 2644                  dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2642 2645                  dcookies[0].dck_npages = 1;
2643 2646          }
2644 2647  
2645 2648          IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2646 2649              uint64_t, sdvma);
2647 2650  
2648 2651          dvma = sdvma;
2649 2652          pde_set = 0;
2650 2653          npages = 1;
2651 2654          size -= psize;
2652 2655          while (size > 0) {
2653 2656                  /* get the size for this page (i.e. partial or full page) */
2654 2657                  psize = MIN(size, MMU_PAGESIZE);
2655 2658                  if (buftype == DMA_OTYP_PAGES) {
2656 2659                          /* get the paddr from the page_t */
2657 2660                          paddr = pfn_to_pa(page->p_pagenum);
2658 2661                          page = page->p_next;
  
    | ↓ open down ↓ | 521 lines elided | ↑ open up ↑ | 
2659 2662                  } else if (pparray != NULL) {
2660 2663                          /* index into the array of page_t's to get the paddr */
2661 2664                          paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2662 2665                          pcnt++;
2663 2666                  } else {
2664 2667                          /* call into the VM to get the paddr */
2665 2668                          paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2666 2669                          vaddr += psize;
2667 2670                  }
2668 2671  
2669      -                npages++;
2670      -
2671 2672                  if (ihp->ihp_npremapped > 0) {
2672      -                        *ihp->ihp_preptes[npages - 1] =
     2673 +                        *ihp->ihp_preptes[npages] =
2673 2674                              PDTE_PADDR(paddr) | rwmask;
2674 2675                  } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2675 2676                          dcookies[dmax].dck_npages++;
2676 2677                  } else {
2677 2678                          /* No, we need a new dcookie */
2678 2679                          if (dmax == (IMMU_NDCK - 1)) {
2679 2680                                  /*
2680 2681                                   * Ran out of dcookies. Map them now.
2681 2682                                   */
2682 2683                                  if (dvma_map(domain, dvma,
2683 2684                                      npages, dcookies, dmax + 1, rdip,
  
    | ↓ open down ↓ | 1 lines elided | ↑ open up ↑ | 
2684 2685                                      immu_flags))
2685 2686                                          pde_set++;
2686 2687  
2687 2688                                  IMMU_DPROBE4(immu__dvmamap__early,
2688 2689                                      dev_info_t *, rdip, uint64_t, dvma,
2689 2690                                      uint_t, npages, uint_t, dmax+1);
2690 2691  
2691 2692                                  dvma += (npages << IMMU_PAGESHIFT);
2692 2693                                  npages = 0;
2693 2694                                  dmax = 0;
2694      -                        } else
     2695 +                        } else {
2695 2696                                  dmax++;
     2697 +                        }
2696 2698                          dcookies[dmax].dck_paddr = paddr;
2697 2699                          dcookies[dmax].dck_npages = 1;
2698 2700                  }
2699 2701                  size -= psize;
     2702 +                if (npages != 0)
     2703 +                        npages++;
2700 2704          }
2701 2705  
2702 2706          /*
2703 2707           * Finish up, mapping all, or all of the remaining,
2704 2708           * physical memory ranges.
2705 2709           */
2706 2710          if (ihp->ihp_npremapped == 0 && npages > 0) {
2707 2711                  IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2708 2712                      uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2709 2713  
2710 2714                  if (dvma_map(domain, dvma, npages, dcookies,
2711 2715                      dmax + 1, rdip, immu_flags))
2712 2716                          pde_set++;
2713 2717          }
2714 2718  
2715 2719          /* Invalidate the IOTLB */
2716 2720          immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2717 2721              pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2718 2722              &ihp->ihp_inv_wait);
2719 2723  
2720 2724          ihp->ihp_ndvseg = 1;
2721 2725          ihp->ihp_dvseg[0].dvs_start = sdvma;
2722 2726          ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2723 2727  
2724 2728          dma_out->dmao_size = dmar_object->dmao_size;
2725 2729          dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2726 2730          dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2727 2731          dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2728 2732          dma_out->dmao_type = DMA_OTYP_DVADDR;
2729 2733  
2730 2734          return (DDI_DMA_MAPPED);
2731 2735  }
2732 2736  
2733 2737  static int
2734 2738  immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2735 2739  {
2736 2740          uint64_t dvma, npages;
2737 2741          domain_t *domain;
2738 2742          struct dvmaseg *dvs;
2739 2743  
2740 2744          domain = IMMU_DEVI(rdip)->imd_domain;
2741 2745          dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2742 2746  
2743 2747          dvma = dvs[0].dvs_start;
2744 2748          npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2745 2749  
2746 2750  #ifdef DEBUG
2747 2751          /* Unmap only in DEBUG mode */
2748 2752          dvma_unmap(domain, dvma, npages, rdip);
2749 2753  #endif
2750 2754          dvma_free(domain, dvma, npages);
2751 2755  
2752 2756          IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2753 2757              uint64_t, dvma);
2754 2758  
2755 2759  #ifdef DEBUG
2756 2760          /*
2757 2761           * In the DEBUG case, the unmap was actually done,
2758 2762           * but an IOTLB flush was not done. So, an explicit
2759 2763           * write back flush is needed.
2760 2764           */
2761 2765          immu_regs_wbf_flush(domain->dom_immu);
2762 2766  #endif
2763 2767  
2764 2768          return (DDI_SUCCESS);
2765 2769  }
2766 2770  
2767 2771  /* ############################# Functions exported ######################## */
2768 2772  
2769 2773  /*
2770 2774   * setup the DVMA subsystem
2771 2775   * this code runs only for the first IOMMU unit
2772 2776   */
2773 2777  void
2774 2778  immu_dvma_setup(list_t *listp)
2775 2779  {
2776 2780          immu_t *immu;
2777 2781          uint_t kval;
2778 2782          size_t nchains;
2779 2783  
2780 2784          /* locks */
2781 2785          mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2782 2786  
2783 2787          /* Create lists */
2784 2788          list_create(&immu_unity_domain_list, sizeof (domain_t),
2785 2789              offsetof(domain_t, dom_maptype_node));
2786 2790          list_create(&immu_xlate_domain_list, sizeof (domain_t),
2787 2791              offsetof(domain_t, dom_maptype_node));
2788 2792  
2789 2793          /* Setup BDF domain hash */
2790 2794          nchains = 0xff;
2791 2795          kval = mod_hash_iddata_gen(nchains);
2792 2796  
2793 2797          bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2794 2798              nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2795 2799              mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2796 2800              KM_NOSLEEP);
2797 2801  
2798 2802          immu = list_head(listp);
2799 2803          for (; immu; immu = list_next(listp, immu)) {
2800 2804                  create_unity_domain(immu);
2801 2805                  did_init(immu);
2802 2806                  context_init(immu);
2803 2807                  immu->immu_dvma_setup = B_TRUE;
2804 2808          }
2805 2809  }
2806 2810  
2807 2811  /*
2808 2812   * Startup up one DVMA unit
2809 2813   */
2810 2814  void
2811 2815  immu_dvma_startup(immu_t *immu)
2812 2816  {
2813 2817          if (immu_gfxdvma_enable == B_FALSE &&
2814 2818              immu->immu_dvma_gfx_only == B_TRUE) {
2815 2819                  return;
2816 2820          }
2817 2821  
2818 2822          /*
2819 2823           * DVMA will start once IOMMU is "running"
2820 2824           */
2821 2825          immu->immu_dvma_running = B_TRUE;
2822 2826  }
2823 2827  
2824 2828  /*
2825 2829   * immu_dvma_physmem_update()
2826 2830   *       called when the installed memory on a
2827 2831   *       system increases, to expand domain DVMA
2828 2832   *       for domains with UNITY mapping
2829 2833   */
2830 2834  void
2831 2835  immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2832 2836  {
2833 2837          uint64_t start;
2834 2838          uint64_t npages;
2835 2839          int dcount;
2836 2840          immu_dcookie_t dcookies[1] = {0};
2837 2841          domain_t *domain;
2838 2842  
2839 2843          /*
2840 2844           * Just walk the system-wide list of domains with
2841 2845           * UNITY mapping. Both the list of *all* domains
2842 2846           * and *UNITY* domains is protected by the same
2843 2847           * single lock
2844 2848           */
2845 2849          mutex_enter(&immu_domain_lock);
2846 2850          domain = list_head(&immu_unity_domain_list);
2847 2851          for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2848 2852                  /*
2849 2853                   * Nothing to do if the IOMMU supports passthrough.
2850 2854                   */
2851 2855                  if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2852 2856                          continue;
2853 2857  
2854 2858                  /* There is no vmem_arena for unity domains. Just map it */
2855 2859                  ddi_err(DER_LOG, domain->dom_dip,
2856 2860                      "iommu: unity-domain: Adding map "
2857 2861                      "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2858 2862  
2859 2863                  start = IMMU_ROUNDOWN(addr);
2860 2864                  npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2861 2865  
2862 2866                  dcookies[0].dck_paddr = start;
2863 2867                  dcookies[0].dck_npages = npages;
2864 2868                  dcount = 1;
2865 2869                  (void) dvma_map(domain, start, npages,
2866 2870                      dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2867 2871  
2868 2872          }
2869 2873          mutex_exit(&immu_domain_lock);
2870 2874  }
2871 2875  
2872 2876  int
2873 2877  immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2874 2878  {
2875 2879          dev_info_t *ddip, *odip;
2876 2880          immu_t *immu;
2877 2881          domain_t *domain;
2878 2882  
2879 2883          odip = rdip;
2880 2884  
2881 2885          immu = immu_dvma_get_immu(rdip, immu_flags);
2882 2886          if (immu == NULL) {
2883 2887                  /*
2884 2888                   * possible that there is no IOMMU unit for this device
2885 2889                   * - BIOS bugs are one example.
2886 2890                   */
2887 2891                  ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2888 2892                  return (DDI_DMA_NORESOURCES);
2889 2893          }
2890 2894  
2891 2895          /*
2892 2896           * redirect isa devices attached under lpc to lpc dip
2893 2897           */
2894 2898          if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2895 2899                  rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2896 2900                  if (rdip == NULL) {
2897 2901                          ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2898 2902                          /*NOTREACHED*/
2899 2903                  }
2900 2904          }
2901 2905  
2902 2906          /* Reset immu, as redirection can change IMMU */
2903 2907          immu = NULL;
2904 2908  
2905 2909          /*
2906 2910           * for gart, redirect to the real graphic devinfo
2907 2911           */
2908 2912          if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2909 2913                  rdip = get_gfx_devinfo(rdip);
2910 2914                  if (rdip == NULL) {
2911 2915                          ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2912 2916                          /*NOTREACHED*/
2913 2917                  }
2914 2918          }
2915 2919  
2916 2920          /*
2917 2921           * Setup DVMA domain for the device. This does
2918 2922           * work only the first time we do DVMA for a
2919 2923           * device.
2920 2924           */
2921 2925          ddip = NULL;
2922 2926          domain = device_domain(rdip, &ddip, immu_flags);
2923 2927          if (domain == NULL) {
2924 2928                  ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2925 2929                  return (DDI_DMA_NORESOURCES);
2926 2930          }
2927 2931  
2928 2932          immu = domain->dom_immu;
2929 2933  
2930 2934          /*
2931 2935           * If a domain is found, we must also have a domain dip
2932 2936           * which is the topmost ancestor dip of rdip that shares
2933 2937           * the same domain with rdip.
2934 2938           */
2935 2939          if (domain->dom_did == 0 || ddip == NULL) {
2936 2940                  ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2937 2941                      domain->dom_did, ddip);
2938 2942                  return (DDI_DMA_NORESOURCES);
2939 2943          }
2940 2944  
2941 2945          if (odip != rdip)
2942 2946                  set_domain(odip, ddip, domain);
2943 2947  
2944 2948          /*
2945 2949           * Update the root and context entries
2946 2950           */
2947 2951          if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2948 2952              != DDI_SUCCESS) {
2949 2953                  ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2950 2954                  return (DDI_DMA_NORESOURCES);
2951 2955          }
2952 2956  
2953 2957          return (DDI_SUCCESS);
2954 2958  }
2955 2959  
2956 2960  int
2957 2961  immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2958 2962  {
2959 2963          immu_dcookie_t dcookies[1] = {0};
2960 2964          boolean_t pde_set;
2961 2965          immu_t *immu;
2962 2966          domain_t *domain;
2963 2967          immu_inv_wait_t iw;
2964 2968  
2965 2969          dcookies[0].dck_paddr = mrng->mrng_start;
2966 2970          dcookies[0].dck_npages = mrng->mrng_npages;
2967 2971  
2968 2972          domain = IMMU_DEVI(rdip)->imd_domain;
2969 2973          immu = domain->dom_immu;
2970 2974  
2971 2975          pde_set = dvma_map(domain, mrng->mrng_start,
2972 2976              mrng->mrng_npages, dcookies, 1, rdip,
2973 2977              IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2974 2978  
2975 2979          immu_init_inv_wait(&iw, "memrange", B_TRUE);
2976 2980  
2977 2981          immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2978 2982              mrng->mrng_npages, pde_set == B_TRUE ?
2979 2983              TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2980 2984  
2981 2985          return (DDI_SUCCESS);
2982 2986  }
2983 2987  
2984 2988  immu_devi_t *
2985 2989  immu_devi_get(dev_info_t *rdip)
2986 2990  {
2987 2991          immu_devi_t *immu_devi;
2988 2992          volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2989 2993  
2990 2994          /* Just want atomic reads. No need for lock */
2991 2995          immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2992 2996              0);
2993 2997          return (immu_devi);
2994 2998  }
2995 2999  
2996 3000  /*ARGSUSED*/
2997 3001  int
2998 3002  immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
2999 3003  {
3000 3004          immu_hdl_priv_t *ihp;
3001 3005  
3002 3006          ihp = buf;
3003 3007          immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3004 3008  
3005 3009          return (0);
3006 3010  }
3007 3011  
3008 3012  /*
3009 3013   * iommulib interface functions
3010 3014   */
3011 3015  static int
3012 3016  immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3013 3017  {
3014 3018          immu_devi_t *immu_devi;
3015 3019          int ret;
3016 3020  
3017 3021          if (!immu_enable)
3018 3022                  return (DDI_FAILURE);
3019 3023  
3020 3024          /*
3021 3025           * Make sure the device has all the IOMMU structures
3022 3026           * initialized. If this device goes through an IOMMU
3023 3027           * unit (e.g. this probe function returns success),
3024 3028           * this will be called at most N times, with N being
3025 3029           * the number of IOMMUs in the system.
3026 3030           *
3027 3031           * After that, when iommulib_nex_open succeeds,
3028 3032           * we can always assume that this device has all
3029 3033           * the structures initialized. IOMMU_USED(dip) will
3030 3034           * be true. There is no need to find the controlling
3031 3035           * IOMMU/domain again.
3032 3036           */
3033 3037          ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3034 3038          if (ret != DDI_SUCCESS)
3035 3039                  return (ret);
3036 3040  
3037 3041          immu_devi = IMMU_DEVI(dip);
3038 3042  
3039 3043          /*
3040 3044           * For unity domains, there is no need to call in to
3041 3045           * the IOMMU code.
3042 3046           */
3043 3047          if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3044 3048                  return (DDI_FAILURE);
3045 3049  
3046 3050          if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3047 3051                  return (DDI_SUCCESS);
3048 3052  
3049 3053          return (DDI_FAILURE);
3050 3054  }
3051 3055  
3052 3056  /*ARGSUSED*/
3053 3057  static int
3054 3058  immu_allochdl(iommulib_handle_t handle,
3055 3059      dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3056 3060      int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3057 3061  {
3058 3062          int ret;
3059 3063          immu_hdl_priv_t *ihp;
3060 3064          immu_t *immu;
3061 3065  
3062 3066          ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3063 3067              arg, dma_handlep);
3064 3068          if (ret == DDI_SUCCESS) {
3065 3069                  immu = IMMU_DEVI(rdip)->imd_immu;
3066 3070  
3067 3071                  ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3068 3072                      waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3069 3073                  if (ihp == NULL) {
3070 3074                          (void) iommulib_iommu_dma_freehdl(dip, rdip,
3071 3075                              *dma_handlep);
3072 3076                          return (DDI_DMA_NORESOURCES);
3073 3077                  }
3074 3078  
3075 3079                  if (IMMU_DEVI(rdip)->imd_use_premap)
3076 3080                          dvma_prealloc(rdip, ihp, attr);
3077 3081                  else {
3078 3082                          ihp->ihp_npremapped = 0;
3079 3083                          ihp->ihp_predvma = 0;
3080 3084                  }
3081 3085                  ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3082 3086                      ihp);
3083 3087          }
3084 3088          return (ret);
3085 3089  }
3086 3090  
3087 3091  /*ARGSUSED*/
3088 3092  static int
3089 3093  immu_freehdl(iommulib_handle_t handle,
3090 3094      dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3091 3095  {
3092 3096          immu_hdl_priv_t *ihp;
3093 3097  
3094 3098          ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3095 3099          if (ihp != NULL) {
3096 3100                  if (IMMU_DEVI(rdip)->imd_use_premap)
3097 3101                          dvma_prefree(rdip, ihp);
3098 3102                  kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3099 3103          }
3100 3104  
3101 3105          return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3102 3106  }
3103 3107  
3104 3108  
3105 3109  /*ARGSUSED*/
3106 3110  static int
3107 3111  immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3108 3112      dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3109 3113      struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3110 3114      uint_t *ccountp)
3111 3115  {
3112 3116          int ret;
3113 3117          immu_hdl_priv_t *ihp;
3114 3118  
3115 3119          ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3116 3120              dma_req, cookiep, ccountp);
3117 3121  
3118 3122          if (ret == DDI_DMA_MAPPED) {
3119 3123                  ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3120 3124                  immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3121 3125          }
3122 3126  
3123 3127          return (ret);
3124 3128  }
3125 3129  
3126 3130  /*ARGSUSED*/
3127 3131  static int
3128 3132  immu_unbindhdl(iommulib_handle_t handle,
3129 3133      dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3130 3134  {
3131 3135          return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3132 3136  }
3133 3137  
3134 3138  /*ARGSUSED*/
3135 3139  static int
3136 3140  immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3137 3141      dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3138 3142      size_t len, uint_t cachefl)
3139 3143  {
3140 3144          return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3141 3145              cachefl));
3142 3146  }
3143 3147  
3144 3148  /*ARGSUSED*/
3145 3149  static int
3146 3150  immu_win(iommulib_handle_t handle, dev_info_t *dip,
3147 3151      dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3148 3152      off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3149 3153      uint_t *ccountp)
3150 3154  {
3151 3155          return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3152 3156              lenp, cookiep, ccountp));
3153 3157  }
3154 3158  
3155 3159  /*ARGSUSED*/
3156 3160  static int
3157 3161  immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3158 3162      dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3159 3163      struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3160 3164  {
3161 3165          immu_hdl_priv_t *ihp;
3162 3166  
3163 3167          ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3164 3168  
3165 3169          return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3166 3170  }
3167 3171  
3168 3172  /*ARGSUSED*/
3169 3173  static int
3170 3174  immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3171 3175      dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3172 3176  {
3173 3177          immu_hdl_priv_t *ihp;
3174 3178  
3175 3179          ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3176 3180          if (ihp->ihp_npremapped > 0)
3177 3181                  return (DDI_SUCCESS);
3178 3182          return (immu_unmap_dvmaseg(rdip, dmao));
3179 3183  }
  
    | ↓ open down ↓ | 470 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX