Print this page
2650 AMD family 0x15 PG support

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/os/cpuid.c
          +++ new/usr/src/uts/i86pc/os/cpuid.c
↓ open down ↓ 152 lines elided ↑ open up ↑
 153  153          "sse4_1",
 154  154          "sse4_2",
 155  155          "1gpg",
 156  156          "clfsh",
 157  157          "64",
 158  158          "aes",
 159  159          "pclmulqdq",
 160  160          "xsave",
 161  161          "avx",
 162  162          "vmx",
 163      -        "svm"
      163 +        "svm",
      164 +        "topoext"
 164  165  };
 165  166  
 166  167  boolean_t
 167  168  is_x86_feature(void *featureset, uint_t feature)
 168  169  {
 169  170          ASSERT(feature < NUM_X86_FEATURES);
 170  171          return (BT_TEST((ulong_t *)featureset, feature));
 171  172  }
 172  173  
 173  174  void
↓ open down ↓ 88 lines elided ↑ open up ↑
 262  263  };
 263  264  
 264  265  
 265  266  /*
 266  267   * These constants determine how many of the elements of the
 267  268   * cpuid we cache in the cpuid_info data structure; the
 268  269   * remaining elements are accessible via the cpuid instruction.
 269  270   */
 270  271  
 271  272  #define NMAX_CPI_STD    6               /* eax = 0 .. 5 */
 272      -#define NMAX_CPI_EXTD   0x1c            /* eax = 0x80000000 .. 0x8000001b */
      273 +#define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
 273  274  
 274  275  /*
 275  276   * Some terminology needs to be explained:
 276  277   *  - Socket: Something that can be plugged into a motherboard.
 277  278   *  - Package: Same as socket
 278  279   *  - Chip: Same as socket. Note that AMD's documentation uses term "chip"
 279  280   *    differently: there, chip is the same as processor node (below)
 280  281   *  - Processor node: Some AMD processors have more than one
 281  282   *    "subprocessor" embedded in a package. These subprocessors (nodes)
 282  283   *    are fully-functional processors themselves with cores, caches,
 283  284   *    memory controllers, PCI configuration spaces. They are connected
 284  285   *    inside the package with Hypertransport links. On single-node
 285  286   *    processors, processor node is equivalent to chip/socket/package.
      287 + *  - Compute Unit: Some AMD processors pair cores in "compute units" that
      288 + *    share the FPU and the I$ and L2 caches.
 286  289   */
 287  290  
 288  291  struct cpuid_info {
 289  292          uint_t cpi_pass;                /* last pass completed */
 290  293          /*
 291  294           * standard function information
 292  295           */
 293  296          uint_t cpi_maxeax;              /* fn 0: %eax */
 294  297          char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
 295  298          uint_t cpi_vendor;              /* enum of cpi_vendorstr */
↓ open down ↓ 40 lines elided ↑ open up ↑
 336  339           */
 337  340          uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
 338  341          const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
 339  342          uint32_t cpi_socket;            /* Chip package/socket type */
 340  343  
 341  344          struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
 342  345          uint32_t cpi_apicid;
 343  346          uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
 344  347          uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
 345  348                                          /* Intel: 1 */
      349 +        uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
      350 +        uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
 346  351  
 347  352          struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
 348  353  };
 349  354  
 350  355  
 351  356  static struct cpuid_info cpuid_info0;
 352  357  
 353  358  /*
 354  359   * These bit fields are defined by the Intel Application Note AP-485
 355  360   * "Intel Processor Identification and the CPUID Instruction"
↓ open down ↓ 364 lines elided ↑ open up ↑
 720  725                  cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
 721  726                  cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
 722  727          } else if (is_x86_feature(feature, X86FSET_HTT)) {
 723  728                  /*
 724  729                   * Single-core multi-threaded processors.
 725  730                   */
 726  731                  cpi->cpi_coreid = cpi->cpi_chipid;
 727  732                  cpi->cpi_pkgcoreid = 0;
 728  733          }
 729  734          cpi->cpi_procnodeid = cpi->cpi_chipid;
      735 +        cpi->cpi_compunitid = cpi->cpi_coreid;
 730  736  }
 731  737  
 732  738  static void
 733  739  cpuid_amd_getids(cpu_t *cpu)
 734  740  {
 735  741          int i, first_half, coreidsz;
 736  742          uint32_t nb_caps_reg;
 737  743          uint_t node2_1;
 738  744          struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
      745 +        struct cpuid_regs *cp;
 739  746  
 740  747          /*
 741  748           * AMD CMP chips currently have a single thread per core.
 742  749           *
 743  750           * Since no two cpus share a core we must assign a distinct coreid
 744  751           * per cpu, and we do this by using the cpu_id.  This scheme does not,
 745  752           * however, guarantee that sibling cores of a chip will have sequential
 746  753           * coreids starting at a multiple of the number of cores per chip -
 747  754           * that is usually the case, but if the ACPI MADT table is presented
 748  755           * in a different order then we need to perform a few more gymnastics
 749  756           * for the pkgcoreid.
 750  757           *
 751  758           * All processors in the system have the same number of enabled
 752  759           * cores. Cores within a processor are always numbered sequentially
 753  760           * from 0 regardless of how many or which are disabled, and there
 754  761           * is no way for operating system to discover the real core id when some
 755  762           * are disabled.
      763 +         *
      764 +         * In family 0x15, the cores come in pairs called compute units. They
      765 +         * share I$ and L2 caches and the FPU. Enumeration of this feature is
      766 +         * simplified by the new topology extensions CPUID leaf, indicated by
      767 +         * the X86 feature X86FSET_TOPOEXT.
 756  768           */
 757  769  
 758  770          cpi->cpi_coreid = cpu->cpu_id;
      771 +        cpi->cpi_compunitid = cpu->cpu_id;
 759  772  
 760  773          if (cpi->cpi_xmaxeax >= 0x80000008) {
 761  774  
 762  775                  coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
 763  776  
 764  777                  /*
 765  778                   * In AMD parlance chip is really a node while Solaris
 766  779                   * sees chip as equivalent to socket/package.
 767  780                   */
 768  781                  cpi->cpi_ncore_per_chip =
↓ open down ↓ 8 lines elided ↑ open up ↑
 777  790          } else {
 778  791                  /* Assume single-core part */
 779  792                  cpi->cpi_ncore_per_chip = 1;
 780  793                  coreidsz = 1;
 781  794          }
 782  795  
 783  796          cpi->cpi_clogid = cpi->cpi_pkgcoreid =
 784  797              cpi->cpi_apicid & ((1<<coreidsz) - 1);
 785  798          cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
 786  799  
 787      -        /* Get nodeID */
 788      -        if (cpi->cpi_family == 0xf) {
      800 +        /* Get node ID, compute unit ID */
      801 +        if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
      802 +            cpi->cpi_xmaxeax >= 0x8000001e) {
      803 +                cp = &cpi->cpi_extd[0x1e];
      804 +                cp->cp_eax = 0x8000001e;
      805 +                (void) __cpuid_insn(cp);
      806 +
      807 +                cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
      808 +                cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
      809 +                cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
      810 +                cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
      811 +                    + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
      812 +                    * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
      813 +        } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
 789  814                  cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
 790      -                cpi->cpi_chipid = cpi->cpi_procnodeid;
 791  815          } else if (cpi->cpi_family == 0x10) {
 792  816                  /*
 793  817                   * See if we are a multi-node processor.
 794  818                   * All processors in the system have the same number of nodes
 795  819                   */
 796  820                  nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
 797  821                  if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
 798  822                          /* Single-node */
 799  823                          cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
 800  824                              coreidsz);
 801      -                        cpi->cpi_chipid = cpi->cpi_procnodeid;
 802  825                  } else {
 803  826  
 804  827                          /*
 805  828                           * Multi-node revision D (2 nodes per package
 806  829                           * are supported)
 807  830                           */
 808  831                          cpi->cpi_procnodes_per_pkg = 2;
 809  832  
 810  833                          first_half = (cpi->cpi_pkgcoreid <=
 811  834                              (cpi->cpi_ncore_per_chip/2 - 1));
 812  835  
 813  836                          if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
 814  837                                  /* We are BSP */
 815  838                                  cpi->cpi_procnodeid = (first_half ? 0 : 1);
 816      -                                cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
 817  839                          } else {
 818  840  
 819  841                                  /* We are AP */
 820  842                                  /* NodeId[2:1] bits to use for reading F3xe8 */
 821  843                                  node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
 822  844  
 823  845                                  nb_caps_reg =
 824  846                                      pci_getl_func(0, 24 + node2_1, 3, 0xe8);
 825  847  
 826  848                                  /*
 827  849                                   * Check IntNodeNum bit (31:30, but bit 31 is
 828  850                                   * always 0 on dual-node processors)
 829  851                                   */
 830  852                                  if (BITX(nb_caps_reg, 30, 30) == 0)
 831  853                                          cpi->cpi_procnodeid = node2_1 +
 832  854                                              !first_half;
 833  855                                  else
 834  856                                          cpi->cpi_procnodeid = node2_1 +
 835  857                                              first_half;
 836      -
 837      -                                cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
 838  858                          }
 839  859                  }
 840      -        } else if (cpi->cpi_family >= 0x11) {
 841      -                cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
 842      -                cpi->cpi_chipid = cpi->cpi_procnodeid;
 843  860          } else {
 844  861                  cpi->cpi_procnodeid = 0;
 845      -                cpi->cpi_chipid = cpi->cpi_procnodeid;
 846  862          }
      863 +
      864 +        cpi->cpi_chipid =
      865 +            cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
 847  866  }
 848  867  
 849  868  /*
 850  869   * Setup XFeature_Enabled_Mask register. Required by xsave feature.
 851  870   */
 852  871  void
 853  872  setup_xfem(void)
 854  873  {
 855  874          uint64_t flags = XFEATURE_LEGACY_FP;
 856  875  
↓ open down ↓ 573 lines elided ↑ open up ↑
1430 1449                                  remove_x86_feature(featureset, X86FSET_SEP);
1431 1450                          }
1432 1451  #endif
1433 1452                          if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1434 1453                                  add_x86_feature(featureset, X86FSET_TSCP);
1435 1454                          }
1436 1455  
1437 1456                          if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1438 1457                                  add_x86_feature(featureset, X86FSET_SVM);
1439 1458                          }
     1459 +
     1460 +                        if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
     1461 +                                add_x86_feature(featureset, X86FSET_TOPOEXT);
     1462 +                        }
1440 1463                          break;
1441 1464                  default:
1442 1465                          break;
1443 1466                  }
1444 1467  
1445 1468                  /*
1446 1469                   * Get CPUID data about processor cores and hyperthreads.
1447 1470                   */
1448 1471                  switch (cpi->cpi_vendor) {
1449 1472                  case X86_VENDOR_Intel:
↓ open down ↓ 88 lines elided ↑ open up ↑
1538 1561          /*
1539 1562           * If the number of cores is the same as the number
1540 1563           * of CPUs, then we cannot have HyperThreading.
1541 1564           */
1542 1565          if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1543 1566                  remove_x86_feature(featureset, X86FSET_HTT);
1544 1567          }
1545 1568  
1546 1569          cpi->cpi_apicid = CPI_APIC_ID(cpi);
1547 1570          cpi->cpi_procnodes_per_pkg = 1;
     1571 +        cpi->cpi_cores_per_compunit = 1;
1548 1572          if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1549 1573              is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1550 1574                  /*
1551 1575                   * Single-core single-threaded processors.
1552 1576                   */
1553 1577                  cpi->cpi_chipid = -1;
1554 1578                  cpi->cpi_clogid = 0;
1555 1579                  cpi->cpi_coreid = cpu->cpu_id;
1556 1580                  cpi->cpi_pkgcoreid = 0;
1557 1581                  if (cpi->cpi_vendor == X86_VENDOR_AMD)
↓ open down ↓ 6 lines elided ↑ open up ↑
1564 1588                  else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1565 1589                          cpuid_amd_getids(cpu);
1566 1590                  else {
1567 1591                          /*
1568 1592                           * All other processors are currently
1569 1593                           * assumed to have single cores.
1570 1594                           */
1571 1595                          cpi->cpi_coreid = cpi->cpi_chipid;
1572 1596                          cpi->cpi_pkgcoreid = 0;
1573 1597                          cpi->cpi_procnodeid = cpi->cpi_chipid;
     1598 +                        cpi->cpi_compunitid = cpi->cpi_chipid;
1574 1599                  }
1575 1600          }
1576 1601  
1577 1602          /*
1578 1603           * Synthesize chip "revision" and socket type
1579 1604           */
1580 1605          cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1581 1606              cpi->cpi_model, cpi->cpi_step);
1582 1607          cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1583 1608              cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
↓ open down ↓ 1413 lines elided ↑ open up ↑
2997 3022          return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2998 3023  }
2999 3024  
3000 3025  uint_t
3001 3026  cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3002 3027  {
3003 3028          ASSERT(cpuid_checkpass(cpu, 1));
3004 3029          return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3005 3030  }
3006 3031  
     3032 +uint_t
     3033 +cpuid_get_compunitid(cpu_t *cpu)
     3034 +{
     3035 +        ASSERT(cpuid_checkpass(cpu, 1));
     3036 +        return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
     3037 +}
     3038 +
     3039 +uint_t
     3040 +cpuid_get_cores_per_compunit(cpu_t *cpu)
     3041 +{
     3042 +        ASSERT(cpuid_checkpass(cpu, 1));
     3043 +        return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
     3044 +}
     3045 +
3007 3046  /*ARGSUSED*/
3008 3047  int
3009 3048  cpuid_have_cr8access(cpu_t *cpu)
3010 3049  {
3011 3050  #if defined(__amd64)
3012 3051          return (1);
3013 3052  #else
3014 3053          struct cpuid_info *cpi;
3015 3054  
3016 3055          ASSERT(cpu != NULL);
↓ open down ↓ 1641 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX