1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 by Delphix. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2010, Intel Corporation.
  27  * All rights reserved.
  28  */
  29 /*
  30  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  31  */
  32 /*
  33  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  34  */
  35 /*
  36  * Various routines to handle identification
  37  * and classification of x86 processors.
  38  */
  39 
  40 #include <sys/types.h>
  41 #include <sys/archsystm.h>
  42 #include <sys/x86_archext.h>
  43 #include <sys/kmem.h>
  44 #include <sys/systm.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/sunndi.h>
  48 #include <sys/cpuvar.h>
  49 #include <sys/processor.h>
  50 #include <sys/sysmacros.h>
  51 #include <sys/pg.h>
  52 #include <sys/fp.h>
  53 #include <sys/controlregs.h>
  54 #include <sys/bitmap.h>
  55 #include <sys/auxv_386.h>
  56 #include <sys/memnode.h>
  57 #include <sys/pci_cfgspace.h>
  58 
  59 #ifdef __xpv
  60 #include <sys/hypervisor.h>
  61 #else
  62 #include <sys/ontrap.h>
  63 #endif
  64 
  65 /*
  66  * Pass 0 of cpuid feature analysis happens in locore. It contains special code
  67  * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
  68  * them accordingly. For most modern processors, feature detection occurs here
  69  * in pass 1.
  70  *
  71  * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
  72  * for the boot CPU and does the basic analysis that the early kernel needs.
  73  * x86_featureset is set based on the return value of cpuid_pass1() of the boot
  74  * CPU.
  75  *
  76  * Pass 1 includes:
  77  *
  78  *      o Determining vendor/model/family/stepping and setting x86_type and
  79  *        x86_vendor accordingly.
  80  *      o Processing the feature flags returned by the cpuid instruction while
  81  *        applying any workarounds or tricks for the specific processor.
  82  *      o Mapping the feature flags into Solaris feature bits (X86_*).
  83  *      o Processing extended feature flags if supported by the processor,
  84  *        again while applying specific processor knowledge.
  85  *      o Determining the CMT characteristics of the system.
  86  *
  87  * Pass 1 is done on non-boot CPUs during their initialization and the results
  88  * are used only as a meager attempt at ensuring that all processors within the
  89  * system support the same features.
  90  *
  91  * Pass 2 of cpuid feature analysis happens just at the beginning
  92  * of startup().  It just copies in and corrects the remainder
  93  * of the cpuid data we depend on: standard cpuid functions that we didn't
  94  * need for pass1 feature analysis, and extended cpuid functions beyond the
  95  * simple feature processing done in pass1.
  96  *
  97  * Pass 3 of cpuid analysis is invoked after basic kernel services; in
  98  * particular kernel memory allocation has been made available. It creates a
  99  * readable brand string based on the data collected in the first two passes.
 100  *
 101  * Pass 4 of cpuid analysis is invoked after post_startup() when all
 102  * the support infrastructure for various hardware features has been
 103  * initialized. It determines which processor features will be reported
 104  * to userland via the aux vector.
 105  *
 106  * All passes are executed on all CPUs, but only the boot CPU determines what
 107  * features the kernel will use.
 108  *
 109  * Much of the worst junk in this file is for the support of processors
 110  * that didn't really implement the cpuid instruction properly.
 111  *
 112  * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
 113  * the pass numbers.  Accordingly, changes to the pass code may require changes
 114  * to the accessor code.
 115  */
 116 
 117 uint_t x86_vendor = X86_VENDOR_IntelClone;
 118 uint_t x86_type = X86_TYPE_OTHER;
 119 uint_t x86_clflush_size = 0;
 120 
 121 uint_t pentiumpro_bug4046376;
 122 uint_t pentiumpro_bug4064495;
 123 
 124 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
 125 
 126 static char *x86_feature_names[NUM_X86_FEATURES] = {
 127         "lgpg",
 128         "tsc",
 129         "msr",
 130         "mtrr",
 131         "pge",
 132         "de",
 133         "cmov",
 134         "mmx",
 135         "mca",
 136         "pae",
 137         "cv8",
 138         "pat",
 139         "sep",
 140         "sse",
 141         "sse2",
 142         "htt",
 143         "asysc",
 144         "nx",
 145         "sse3",
 146         "cx16",
 147         "cmp",
 148         "tscp",
 149         "mwait",
 150         "sse4a",
 151         "cpuid",
 152         "ssse3",
 153         "sse4_1",
 154         "sse4_2",
 155         "1gpg",
 156         "clfsh",
 157         "64",
 158         "aes",
 159         "pclmulqdq",
 160         "xsave",
 161         "avx",
 162         "vmx",
 163         "svm",
 164         "topoext"
 165 };
 166 
 167 boolean_t
 168 is_x86_feature(void *featureset, uint_t feature)
 169 {
 170         ASSERT(feature < NUM_X86_FEATURES);
 171         return (BT_TEST((ulong_t *)featureset, feature));
 172 }
 173 
 174 void
 175 add_x86_feature(void *featureset, uint_t feature)
 176 {
 177         ASSERT(feature < NUM_X86_FEATURES);
 178         BT_SET((ulong_t *)featureset, feature);
 179 }
 180 
 181 void
 182 remove_x86_feature(void *featureset, uint_t feature)
 183 {
 184         ASSERT(feature < NUM_X86_FEATURES);
 185         BT_CLEAR((ulong_t *)featureset, feature);
 186 }
 187 
 188 boolean_t
 189 compare_x86_featureset(void *setA, void *setB)
 190 {
 191         /*
 192          * We assume that the unused bits of the bitmap are always zero.
 193          */
 194         if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
 195                 return (B_TRUE);
 196         } else {
 197                 return (B_FALSE);
 198         }
 199 }
 200 
 201 void
 202 print_x86_featureset(void *featureset)
 203 {
 204         uint_t i;
 205 
 206         for (i = 0; i < NUM_X86_FEATURES; i++) {
 207                 if (is_x86_feature(featureset, i)) {
 208                         cmn_err(CE_CONT, "?x86_feature: %s\n",
 209                             x86_feature_names[i]);
 210                 }
 211         }
 212 }
 213 
 214 uint_t enable486;
 215 
 216 static size_t xsave_state_size = 0;
 217 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
 218 boolean_t xsave_force_disable = B_FALSE;
 219 
 220 /*
 221  * This is set to platform type Solaris is running on.
 222  */
 223 static int platform_type = -1;
 224 
 225 #if !defined(__xpv)
 226 /*
 227  * Variable to patch if hypervisor platform detection needs to be
 228  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
 229  */
 230 int enable_platform_detection = 1;
 231 #endif
 232 
 233 /*
 234  * monitor/mwait info.
 235  *
 236  * size_actual and buf_actual are the real address and size allocated to get
 237  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
 238  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
 239  * processor cache-line alignment, but this is not guarantied in the furture.
 240  */
 241 struct mwait_info {
 242         size_t          mon_min;        /* min size to avoid missed wakeups */
 243         size_t          mon_max;        /* size to avoid false wakeups */
 244         size_t          size_actual;    /* size actually allocated */
 245         void            *buf_actual;    /* memory actually allocated */
 246         uint32_t        support;        /* processor support of monitor/mwait */
 247 };
 248 
 249 /*
 250  * xsave/xrestor info.
 251  *
 252  * This structure contains HW feature bits and size of the xsave save area.
 253  * Note: the kernel will use the maximum size required for all hardware
 254  * features. It is not optimize for potential memory savings if features at
 255  * the end of the save area are not enabled.
 256  */
 257 struct xsave_info {
 258         uint32_t        xsav_hw_features_low;   /* Supported HW features */
 259         uint32_t        xsav_hw_features_high;  /* Supported HW features */
 260         size_t          xsav_max_size;  /* max size save area for HW features */
 261         size_t          ymm_size;       /* AVX: size of ymm save area */
 262         size_t          ymm_offset;     /* AVX: offset for ymm save area */
 263 };
 264 
 265 
 266 /*
 267  * These constants determine how many of the elements of the
 268  * cpuid we cache in the cpuid_info data structure; the
 269  * remaining elements are accessible via the cpuid instruction.
 270  */
 271 
 272 #define NMAX_CPI_STD    6               /* eax = 0 .. 5 */
 273 #define NMAX_CPI_EXTD   0x1f            /* eax = 0x80000000 .. 0x8000001e */
 274 
 275 /*
 276  * Some terminology needs to be explained:
 277  *  - Socket: Something that can be plugged into a motherboard.
 278  *  - Package: Same as socket
 279  *  - Chip: Same as socket. Note that AMD's documentation uses term "chip"
 280  *    differently: there, chip is the same as processor node (below)
 281  *  - Processor node: Some AMD processors have more than one
 282  *    "subprocessor" embedded in a package. These subprocessors (nodes)
 283  *    are fully-functional processors themselves with cores, caches,
 284  *    memory controllers, PCI configuration spaces. They are connected
 285  *    inside the package with Hypertransport links. On single-node
 286  *    processors, processor node is equivalent to chip/socket/package.
 287  *  - Compute Unit: Some AMD processors pair cores in "compute units" that
 288  *    share the FPU and the I$ and L2 caches.
 289  */
 290 
 291 struct cpuid_info {
 292         uint_t cpi_pass;                /* last pass completed */
 293         /*
 294          * standard function information
 295          */
 296         uint_t cpi_maxeax;              /* fn 0: %eax */
 297         char cpi_vendorstr[13];         /* fn 0: %ebx:%ecx:%edx */
 298         uint_t cpi_vendor;              /* enum of cpi_vendorstr */
 299 
 300         uint_t cpi_family;              /* fn 1: extended family */
 301         uint_t cpi_model;               /* fn 1: extended model */
 302         uint_t cpi_step;                /* fn 1: stepping */
 303         chipid_t cpi_chipid;            /* fn 1: %ebx:  Intel: chip # */
 304                                         /*              AMD: package/socket # */
 305         uint_t cpi_brandid;             /* fn 1: %ebx: brand ID */
 306         int cpi_clogid;                 /* fn 1: %ebx: thread # */
 307         uint_t cpi_ncpu_per_chip;       /* fn 1: %ebx: logical cpu count */
 308         uint8_t cpi_cacheinfo[16];      /* fn 2: intel-style cache desc */
 309         uint_t cpi_ncache;              /* fn 2: number of elements */
 310         uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
 311         id_t cpi_last_lvl_cacheid;      /* fn 4: %eax: derived cache id */
 312         uint_t cpi_std_4_size;          /* fn 4: number of fn 4 elements */
 313         struct cpuid_regs **cpi_std_4;  /* fn 4: %ecx == 0 .. fn4_size */
 314         struct cpuid_regs cpi_std[NMAX_CPI_STD];        /* 0 .. 5 */
 315         /*
 316          * extended function information
 317          */
 318         uint_t cpi_xmaxeax;             /* fn 0x80000000: %eax */
 319         char cpi_brandstr[49];          /* fn 0x8000000[234] */
 320         uint8_t cpi_pabits;             /* fn 0x80000006: %eax */
 321         uint8_t cpi_vabits;             /* fn 0x80000006: %eax */
 322         struct  cpuid_regs cpi_extd[NMAX_CPI_EXTD];     /* 0x800000XX */
 323 
 324         id_t cpi_coreid;                /* same coreid => strands share core */
 325         int cpi_pkgcoreid;              /* core number within single package */
 326         uint_t cpi_ncore_per_chip;      /* AMD: fn 0x80000008: %ecx[7-0] */
 327                                         /* Intel: fn 4: %eax[31-26] */
 328         /*
 329          * supported feature information
 330          */
 331         uint32_t cpi_support[5];
 332 #define STD_EDX_FEATURES        0
 333 #define AMD_EDX_FEATURES        1
 334 #define TM_EDX_FEATURES         2
 335 #define STD_ECX_FEATURES        3
 336 #define AMD_ECX_FEATURES        4
 337         /*
 338          * Synthesized information, where known.
 339          */
 340         uint32_t cpi_chiprev;           /* See X86_CHIPREV_* in x86_archext.h */
 341         const char *cpi_chiprevstr;     /* May be NULL if chiprev unknown */
 342         uint32_t cpi_socket;            /* Chip package/socket type */
 343 
 344         struct mwait_info cpi_mwait;    /* fn 5: monitor/mwait info */
 345         uint32_t cpi_apicid;
 346         uint_t cpi_procnodeid;          /* AMD: nodeID on HT, Intel: chipid */
 347         uint_t cpi_procnodes_per_pkg;   /* AMD: # of nodes in the package */
 348                                         /* Intel: 1 */
 349         uint_t cpi_compunitid;          /* AMD: ComputeUnit ID, Intel: coreid */
 350         uint_t cpi_cores_per_compunit;  /* AMD: # of cores in the ComputeUnit */
 351 
 352         struct xsave_info cpi_xsave;    /* fn D: xsave/xrestor info */
 353 };
 354 
 355 
 356 static struct cpuid_info cpuid_info0;
 357 
 358 /*
 359  * These bit fields are defined by the Intel Application Note AP-485
 360  * "Intel Processor Identification and the CPUID Instruction"
 361  */
 362 #define CPI_FAMILY_XTD(cpi)     BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
 363 #define CPI_MODEL_XTD(cpi)      BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
 364 #define CPI_TYPE(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
 365 #define CPI_FAMILY(cpi)         BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
 366 #define CPI_STEP(cpi)           BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
 367 #define CPI_MODEL(cpi)          BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
 368 
 369 #define CPI_FEATURES_EDX(cpi)           ((cpi)->cpi_std[1].cp_edx)
 370 #define CPI_FEATURES_ECX(cpi)           ((cpi)->cpi_std[1].cp_ecx)
 371 #define CPI_FEATURES_XTD_EDX(cpi)       ((cpi)->cpi_extd[1].cp_edx)
 372 #define CPI_FEATURES_XTD_ECX(cpi)       ((cpi)->cpi_extd[1].cp_ecx)
 373 
 374 #define CPI_BRANDID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
 375 #define CPI_CHUNKS(cpi)         BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
 376 #define CPI_CPU_COUNT(cpi)      BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
 377 #define CPI_APIC_ID(cpi)        BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
 378 
 379 #define CPI_MAXEAX_MAX          0x100           /* sanity control */
 380 #define CPI_XMAXEAX_MAX         0x80000100
 381 #define CPI_FN4_ECX_MAX         0x20            /* sanity: max fn 4 levels */
 382 #define CPI_FNB_ECX_MAX         0x20            /* sanity: max fn B levels */
 383 
 384 /*
 385  * Function 4 (Deterministic Cache Parameters) macros
 386  * Defined by Intel Application Note AP-485
 387  */
 388 #define CPI_NUM_CORES(regs)             BITX((regs)->cp_eax, 31, 26)
 389 #define CPI_NTHR_SHR_CACHE(regs)        BITX((regs)->cp_eax, 25, 14)
 390 #define CPI_FULL_ASSOC_CACHE(regs)      BITX((regs)->cp_eax, 9, 9)
 391 #define CPI_SELF_INIT_CACHE(regs)       BITX((regs)->cp_eax, 8, 8)
 392 #define CPI_CACHE_LVL(regs)             BITX((regs)->cp_eax, 7, 5)
 393 #define CPI_CACHE_TYPE(regs)            BITX((regs)->cp_eax, 4, 0)
 394 #define CPI_CPU_LEVEL_TYPE(regs)        BITX((regs)->cp_ecx, 15, 8)
 395 
 396 #define CPI_CACHE_WAYS(regs)            BITX((regs)->cp_ebx, 31, 22)
 397 #define CPI_CACHE_PARTS(regs)           BITX((regs)->cp_ebx, 21, 12)
 398 #define CPI_CACHE_COH_LN_SZ(regs)       BITX((regs)->cp_ebx, 11, 0)
 399 
 400 #define CPI_CACHE_SETS(regs)            BITX((regs)->cp_ecx, 31, 0)
 401 
 402 #define CPI_PREFCH_STRIDE(regs)         BITX((regs)->cp_edx, 9, 0)
 403 
 404 
 405 /*
 406  * A couple of shorthand macros to identify "later" P6-family chips
 407  * like the Pentium M and Core.  First, the "older" P6-based stuff
 408  * (loosely defined as "pre-Pentium-4"):
 409  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
 410  */
 411 
 412 #define IS_LEGACY_P6(cpi) (                     \
 413         cpi->cpi_family == 6 &&              \
 414                 (cpi->cpi_model == 1 ||              \
 415                 cpi->cpi_model == 3 ||               \
 416                 cpi->cpi_model == 5 ||               \
 417                 cpi->cpi_model == 6 ||               \
 418                 cpi->cpi_model == 7 ||               \
 419                 cpi->cpi_model == 8 ||               \
 420                 cpi->cpi_model == 0xA ||     \
 421                 cpi->cpi_model == 0xB)               \
 422 )
 423 
 424 /* A "new F6" is everything with family 6 that's not the above */
 425 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
 426 
 427 /* Extended family/model support */
 428 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
 429         cpi->cpi_family >= 0xf)
 430 
 431 /*
 432  * Info for monitor/mwait idle loop.
 433  *
 434  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
 435  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
 436  * 2006.
 437  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
 438  * Documentation Updates" #33633, Rev 2.05, December 2006.
 439  */
 440 #define MWAIT_SUPPORT           (0x00000001)    /* mwait supported */
 441 #define MWAIT_EXTENSIONS        (0x00000002)    /* extenstion supported */
 442 #define MWAIT_ECX_INT_ENABLE    (0x00000004)    /* ecx 1 extension supported */
 443 #define MWAIT_SUPPORTED(cpi)    ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
 444 #define MWAIT_INT_ENABLE(cpi)   ((cpi)->cpi_std[5].cp_ecx & 0x2)
 445 #define MWAIT_EXTENSION(cpi)    ((cpi)->cpi_std[5].cp_ecx & 0x1)
 446 #define MWAIT_SIZE_MIN(cpi)     BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
 447 #define MWAIT_SIZE_MAX(cpi)     BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
 448 /*
 449  * Number of sub-cstates for a given c-state.
 450  */
 451 #define MWAIT_NUM_SUBC_STATES(cpi, c_state)                     \
 452         BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
 453 
 454 /*
 455  * XSAVE leaf 0xD enumeration
 456  */
 457 #define CPUID_LEAFD_2_YMM_OFFSET        576
 458 #define CPUID_LEAFD_2_YMM_SIZE          256
 459 
 460 /*
 461  * Functions we consune from cpuid_subr.c;  don't publish these in a header
 462  * file to try and keep people using the expected cpuid_* interfaces.
 463  */
 464 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
 465 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
 466 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
 467 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
 468 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
 469 
 470 /*
 471  * Apply up various platform-dependent restrictions where the
 472  * underlying platform restrictions mean the CPU can be marked
 473  * as less capable than its cpuid instruction would imply.
 474  */
 475 #if defined(__xpv)
 476 static void
 477 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
 478 {
 479         switch (eax) {
 480         case 1: {
 481                 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
 482                     0 : CPUID_INTC_EDX_MCA;
 483                 cp->cp_edx &=
 484                     ~(mcamask |
 485                     CPUID_INTC_EDX_PSE |
 486                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
 487                     CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
 488                     CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
 489                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
 490                     CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
 491                 break;
 492         }
 493 
 494         case 0x80000001:
 495                 cp->cp_edx &=
 496                     ~(CPUID_AMD_EDX_PSE |
 497                     CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
 498                     CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
 499                     CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
 500                     CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
 501                     CPUID_AMD_EDX_TSCP);
 502                 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
 503                 break;
 504         default:
 505                 break;
 506         }
 507 
 508         switch (vendor) {
 509         case X86_VENDOR_Intel:
 510                 switch (eax) {
 511                 case 4:
 512                         /*
 513                          * Zero out the (ncores-per-chip - 1) field
 514                          */
 515                         cp->cp_eax &= 0x03fffffff;
 516                         break;
 517                 default:
 518                         break;
 519                 }
 520                 break;
 521         case X86_VENDOR_AMD:
 522                 switch (eax) {
 523 
 524                 case 0x80000001:
 525                         cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
 526                         break;
 527 
 528                 case 0x80000008:
 529                         /*
 530                          * Zero out the (ncores-per-chip - 1) field
 531                          */
 532                         cp->cp_ecx &= 0xffffff00;
 533                         break;
 534                 default:
 535                         break;
 536                 }
 537                 break;
 538         default:
 539                 break;
 540         }
 541 }
 542 #else
 543 #define platform_cpuid_mangle(vendor, eax, cp)  /* nothing */
 544 #endif
 545 
 546 /*
 547  *  Some undocumented ways of patching the results of the cpuid
 548  *  instruction to permit running Solaris 10 on future cpus that
 549  *  we don't currently support.  Could be set to non-zero values
 550  *  via settings in eeprom.
 551  */
 552 
 553 uint32_t cpuid_feature_ecx_include;
 554 uint32_t cpuid_feature_ecx_exclude;
 555 uint32_t cpuid_feature_edx_include;
 556 uint32_t cpuid_feature_edx_exclude;
 557 
 558 /*
 559  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
 560  */
 561 void
 562 cpuid_alloc_space(cpu_t *cpu)
 563 {
 564         /*
 565          * By convention, cpu0 is the boot cpu, which is set up
 566          * before memory allocation is available.  All other cpus get
 567          * their cpuid_info struct allocated here.
 568          */
 569         ASSERT(cpu->cpu_id != 0);
 570         ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
 571         cpu->cpu_m.mcpu_cpi =
 572             kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
 573 }
 574 
 575 void
 576 cpuid_free_space(cpu_t *cpu)
 577 {
 578         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
 579         int i;
 580 
 581         ASSERT(cpi != NULL);
 582         ASSERT(cpi != &cpuid_info0);
 583 
 584         /*
 585          * Free up any function 4 related dynamic storage
 586          */
 587         for (i = 1; i < cpi->cpi_std_4_size; i++)
 588                 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
 589         if (cpi->cpi_std_4_size > 0)
 590                 kmem_free(cpi->cpi_std_4,
 591                     cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
 592 
 593         kmem_free(cpi, sizeof (*cpi));
 594         cpu->cpu_m.mcpu_cpi = NULL;
 595 }
 596 
 597 #if !defined(__xpv)
 598 
 599 /*
 600  * Determine the type of the underlying platform. This is used to customize
 601  * initialization of various subsystems (e.g. TSC). determine_platform() must
 602  * only ever be called once to prevent two processors from seeing different
 603  * values of platform_type, it must be called before cpuid_pass1(), the
 604  * earliest consumer to execute.
 605  */
 606 void
 607 determine_platform(void)
 608 {
 609         struct cpuid_regs cp;
 610         char *xen_str;
 611         uint32_t xen_signature[4], base;
 612 
 613         ASSERT(platform_type == -1);
 614 
 615         platform_type = HW_NATIVE;
 616 
 617         if (!enable_platform_detection)
 618                 return;
 619 
 620         /*
 621          * In a fully virtualized domain, Xen's pseudo-cpuid function
 622          * returns a string representing the Xen signature in %ebx, %ecx,
 623          * and %edx. %eax contains the maximum supported cpuid function.
 624          * We need at least a (base + 2) leaf value to do what we want
 625          * to do. Try different base values, since the hypervisor might
 626          * use a different one depending on whether hyper-v emulation
 627          * is switched on by default or not.
 628          */
 629         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
 630                 cp.cp_eax = base;
 631                 (void) __cpuid_insn(&cp);
 632                 xen_signature[0] = cp.cp_ebx;
 633                 xen_signature[1] = cp.cp_ecx;
 634                 xen_signature[2] = cp.cp_edx;
 635                 xen_signature[3] = 0;
 636                 xen_str = (char *)xen_signature;
 637                 if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
 638                     cp.cp_eax >= (base + 2)) {
 639                         platform_type = HW_XEN_HVM;
 640                         return;
 641                 }
 642         }
 643 
 644         if (vmware_platform()) /* running under vmware hypervisor? */
 645                 platform_type = HW_VMWARE;
 646 }
 647 
 648 int
 649 get_hwenv(void)
 650 {
 651         ASSERT(platform_type != -1);
 652         return (platform_type);
 653 }
 654 
 655 int
 656 is_controldom(void)
 657 {
 658         return (0);
 659 }
 660 
 661 #else
 662 
 663 int
 664 get_hwenv(void)
 665 {
 666         return (HW_XEN_PV);
 667 }
 668 
 669 int
 670 is_controldom(void)
 671 {
 672         return (DOMAIN_IS_INITDOMAIN(xen_info));
 673 }
 674 
 675 #endif  /* __xpv */
 676 
 677 static void
 678 cpuid_intel_getids(cpu_t *cpu, void *feature)
 679 {
 680         uint_t i;
 681         uint_t chipid_shift = 0;
 682         uint_t coreid_shift = 0;
 683         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
 684 
 685         for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
 686                 chipid_shift++;
 687 
 688         cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
 689         cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
 690 
 691         if (is_x86_feature(feature, X86FSET_CMP)) {
 692                 /*
 693                  * Multi-core (and possibly multi-threaded)
 694                  * processors.
 695                  */
 696                 uint_t ncpu_per_core;
 697                 if (cpi->cpi_ncore_per_chip == 1)
 698                         ncpu_per_core = cpi->cpi_ncpu_per_chip;
 699                 else if (cpi->cpi_ncore_per_chip > 1)
 700                         ncpu_per_core = cpi->cpi_ncpu_per_chip /
 701                             cpi->cpi_ncore_per_chip;
 702                 /*
 703                  * 8bit APIC IDs on dual core Pentiums
 704                  * look like this:
 705                  *
 706                  * +-----------------------+------+------+
 707                  * | Physical Package ID   |  MC  |  HT  |
 708                  * +-----------------------+------+------+
 709                  * <------- chipid -------->
 710                  * <------- coreid --------------->
 711                  *                         <--- clogid -->
 712                  *                         <------>
 713                  *                         pkgcoreid
 714                  *
 715                  * Where the number of bits necessary to
 716                  * represent MC and HT fields together equals
 717                  * to the minimum number of bits necessary to
 718                  * store the value of cpi->cpi_ncpu_per_chip.
 719                  * Of those bits, the MC part uses the number
 720                  * of bits necessary to store the value of
 721                  * cpi->cpi_ncore_per_chip.
 722                  */
 723                 for (i = 1; i < ncpu_per_core; i <<= 1)
 724                         coreid_shift++;
 725                 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
 726                 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
 727         } else if (is_x86_feature(feature, X86FSET_HTT)) {
 728                 /*
 729                  * Single-core multi-threaded processors.
 730                  */
 731                 cpi->cpi_coreid = cpi->cpi_chipid;
 732                 cpi->cpi_pkgcoreid = 0;
 733         }
 734         cpi->cpi_procnodeid = cpi->cpi_chipid;
 735         cpi->cpi_compunitid = cpi->cpi_coreid;
 736 }
 737 
 738 static void
 739 cpuid_amd_getids(cpu_t *cpu)
 740 {
 741         int i, first_half, coreidsz;
 742         uint32_t nb_caps_reg;
 743         uint_t node2_1;
 744         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
 745         struct cpuid_regs *cp;
 746 
 747         /*
 748          * AMD CMP chips currently have a single thread per core.
 749          *
 750          * Since no two cpus share a core we must assign a distinct coreid
 751          * per cpu, and we do this by using the cpu_id.  This scheme does not,
 752          * however, guarantee that sibling cores of a chip will have sequential
 753          * coreids starting at a multiple of the number of cores per chip -
 754          * that is usually the case, but if the ACPI MADT table is presented
 755          * in a different order then we need to perform a few more gymnastics
 756          * for the pkgcoreid.
 757          *
 758          * All processors in the system have the same number of enabled
 759          * cores. Cores within a processor are always numbered sequentially
 760          * from 0 regardless of how many or which are disabled, and there
 761          * is no way for operating system to discover the real core id when some
 762          * are disabled.
 763          *
 764          * In family 0x15, the cores come in pairs called compute units. They
 765          * share I$ and L2 caches and the FPU. Enumeration of this feature is
 766          * simplified by the new topology extensions CPUID leaf, indicated by
 767          * the X86 feature X86FSET_TOPOEXT.
 768          */
 769 
 770         cpi->cpi_coreid = cpu->cpu_id;
 771         cpi->cpi_compunitid = cpu->cpu_id;
 772 
 773         if (cpi->cpi_xmaxeax >= 0x80000008) {
 774 
 775                 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
 776 
 777                 /*
 778                  * In AMD parlance chip is really a node while Solaris
 779                  * sees chip as equivalent to socket/package.
 780                  */
 781                 cpi->cpi_ncore_per_chip =
 782                     BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
 783                 if (coreidsz == 0) {
 784                         /* Use legacy method */
 785                         for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
 786                                 coreidsz++;
 787                         if (coreidsz == 0)
 788                                 coreidsz = 1;
 789                 }
 790         } else {
 791                 /* Assume single-core part */
 792                 cpi->cpi_ncore_per_chip = 1;
 793                 coreidsz = 1;
 794         }
 795 
 796         cpi->cpi_clogid = cpi->cpi_pkgcoreid =
 797             cpi->cpi_apicid & ((1<<coreidsz) - 1);
 798         cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
 799 
 800         /* Get node ID, compute unit ID */
 801         if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
 802             cpi->cpi_xmaxeax >= 0x8000001e) {
 803                 cp = &cpi->cpi_extd[0x1e];
 804                 cp->cp_eax = 0x8000001e;
 805                 (void) __cpuid_insn(cp);
 806 
 807                 cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
 808                 cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
 809                 cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
 810                 cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
 811                     + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
 812                     * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
 813         } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
 814                 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
 815         } else if (cpi->cpi_family == 0x10) {
 816                 /*
 817                  * See if we are a multi-node processor.
 818                  * All processors in the system have the same number of nodes
 819                  */
 820                 nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
 821                 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
 822                         /* Single-node */
 823                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
 824                             coreidsz);
 825                 } else {
 826 
 827                         /*
 828                          * Multi-node revision D (2 nodes per package
 829                          * are supported)
 830                          */
 831                         cpi->cpi_procnodes_per_pkg = 2;
 832 
 833                         first_half = (cpi->cpi_pkgcoreid <=
 834                             (cpi->cpi_ncore_per_chip/2 - 1));
 835 
 836                         if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
 837                                 /* We are BSP */
 838                                 cpi->cpi_procnodeid = (first_half ? 0 : 1);
 839                         } else {
 840 
 841                                 /* We are AP */
 842                                 /* NodeId[2:1] bits to use for reading F3xe8 */
 843                                 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
 844 
 845                                 nb_caps_reg =
 846                                     pci_getl_func(0, 24 + node2_1, 3, 0xe8);
 847 
 848                                 /*
 849                                  * Check IntNodeNum bit (31:30, but bit 31 is
 850                                  * always 0 on dual-node processors)
 851                                  */
 852                                 if (BITX(nb_caps_reg, 30, 30) == 0)
 853                                         cpi->cpi_procnodeid = node2_1 +
 854                                             !first_half;
 855                                 else
 856                                         cpi->cpi_procnodeid = node2_1 +
 857                                             first_half;
 858                         }
 859                 }
 860         } else {
 861                 cpi->cpi_procnodeid = 0;
 862         }
 863 
 864         cpi->cpi_chipid =
 865             cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
 866 }
 867 
 868 /*
 869  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
 870  */
 871 void
 872 setup_xfem(void)
 873 {
 874         uint64_t flags = XFEATURE_LEGACY_FP;
 875 
 876         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
 877 
 878         if (is_x86_feature(x86_featureset, X86FSET_SSE))
 879                 flags |= XFEATURE_SSE;
 880 
 881         if (is_x86_feature(x86_featureset, X86FSET_AVX))
 882                 flags |= XFEATURE_AVX;
 883 
 884         set_xcr(XFEATURE_ENABLED_MASK, flags);
 885 
 886         xsave_bv_all = flags;
 887 }
 888 
 889 void
 890 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
 891 {
 892         uint32_t mask_ecx, mask_edx;
 893         struct cpuid_info *cpi;
 894         struct cpuid_regs *cp;
 895         int xcpuid;
 896 #if !defined(__xpv)
 897         extern int idle_cpu_prefer_mwait;
 898 #endif
 899 
 900         /*
 901          * Space statically allocated for BSP, ensure pointer is set
 902          */
 903         if (cpu->cpu_id == 0) {
 904                 if (cpu->cpu_m.mcpu_cpi == NULL)
 905                         cpu->cpu_m.mcpu_cpi = &cpuid_info0;
 906         }
 907 
 908         add_x86_feature(featureset, X86FSET_CPUID);
 909 
 910         cpi = cpu->cpu_m.mcpu_cpi;
 911         ASSERT(cpi != NULL);
 912         cp = &cpi->cpi_std[0];
 913         cp->cp_eax = 0;
 914         cpi->cpi_maxeax = __cpuid_insn(cp);
 915         {
 916                 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
 917                 *iptr++ = cp->cp_ebx;
 918                 *iptr++ = cp->cp_edx;
 919                 *iptr++ = cp->cp_ecx;
 920                 *(char *)&cpi->cpi_vendorstr[12] = '\0';
 921         }
 922 
 923         cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
 924         x86_vendor = cpi->cpi_vendor; /* for compatibility */
 925 
 926         /*
 927          * Limit the range in case of weird hardware
 928          */
 929         if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
 930                 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
 931         if (cpi->cpi_maxeax < 1)
 932                 goto pass1_done;
 933 
 934         cp = &cpi->cpi_std[1];
 935         cp->cp_eax = 1;
 936         (void) __cpuid_insn(cp);
 937 
 938         /*
 939          * Extract identifying constants for easy access.
 940          */
 941         cpi->cpi_model = CPI_MODEL(cpi);
 942         cpi->cpi_family = CPI_FAMILY(cpi);
 943 
 944         if (cpi->cpi_family == 0xf)
 945                 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
 946 
 947         /*
 948          * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
 949          * Intel, and presumably everyone else, uses model == 0xf, as
 950          * one would expect (max value means possible overflow).  Sigh.
 951          */
 952 
 953         switch (cpi->cpi_vendor) {
 954         case X86_VENDOR_Intel:
 955                 if (IS_EXTENDED_MODEL_INTEL(cpi))
 956                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
 957                 break;
 958         case X86_VENDOR_AMD:
 959                 if (CPI_FAMILY(cpi) == 0xf)
 960                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
 961                 break;
 962         default:
 963                 if (cpi->cpi_model == 0xf)
 964                         cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
 965                 break;
 966         }
 967 
 968         cpi->cpi_step = CPI_STEP(cpi);
 969         cpi->cpi_brandid = CPI_BRANDID(cpi);
 970 
 971         /*
 972          * *default* assumptions:
 973          * - believe %edx feature word
 974          * - ignore %ecx feature word
 975          * - 32-bit virtual and physical addressing
 976          */
 977         mask_edx = 0xffffffff;
 978         mask_ecx = 0;
 979 
 980         cpi->cpi_pabits = cpi->cpi_vabits = 32;
 981 
 982         switch (cpi->cpi_vendor) {
 983         case X86_VENDOR_Intel:
 984                 if (cpi->cpi_family == 5)
 985                         x86_type = X86_TYPE_P5;
 986                 else if (IS_LEGACY_P6(cpi)) {
 987                         x86_type = X86_TYPE_P6;
 988                         pentiumpro_bug4046376 = 1;
 989                         pentiumpro_bug4064495 = 1;
 990                         /*
 991                          * Clear the SEP bit when it was set erroneously
 992                          */
 993                         if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
 994                                 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
 995                 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
 996                         x86_type = X86_TYPE_P4;
 997                         /*
 998                          * We don't currently depend on any of the %ecx
 999                          * features until Prescott, so we'll only check
1000                          * this from P4 onwards.  We might want to revisit
1001                          * that idea later.
1002                          */
1003                         mask_ecx = 0xffffffff;
1004                 } else if (cpi->cpi_family > 0xf)
1005                         mask_ecx = 0xffffffff;
1006                 /*
1007                  * We don't support MONITOR/MWAIT if leaf 5 is not available
1008                  * to obtain the monitor linesize.
1009                  */
1010                 if (cpi->cpi_maxeax < 5)
1011                         mask_ecx &= ~CPUID_INTC_ECX_MON;
1012                 break;
1013         case X86_VENDOR_IntelClone:
1014         default:
1015                 break;
1016         case X86_VENDOR_AMD:
1017 #if defined(OPTERON_ERRATUM_108)
1018                 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1019                         cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1020                         cpi->cpi_model = 0xc;
1021                 } else
1022 #endif
1023                 if (cpi->cpi_family == 5) {
1024                         /*
1025                          * AMD K5 and K6
1026                          *
1027                          * These CPUs have an incomplete implementation
1028                          * of MCA/MCE which we mask away.
1029                          */
1030                         mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1031 
1032                         /*
1033                          * Model 0 uses the wrong (APIC) bit
1034                          * to indicate PGE.  Fix it here.
1035                          */
1036                         if (cpi->cpi_model == 0) {
1037                                 if (cp->cp_edx & 0x200) {
1038                                         cp->cp_edx &= ~0x200;
1039                                         cp->cp_edx |= CPUID_INTC_EDX_PGE;
1040                                 }
1041                         }
1042 
1043                         /*
1044                          * Early models had problems w/ MMX; disable.
1045                          */
1046                         if (cpi->cpi_model < 6)
1047                                 mask_edx &= ~CPUID_INTC_EDX_MMX;
1048                 }
1049 
1050                 /*
1051                  * For newer families, SSE3 and CX16, at least, are valid;
1052                  * enable all
1053                  */
1054                 if (cpi->cpi_family >= 0xf)
1055                         mask_ecx = 0xffffffff;
1056                 /*
1057                  * We don't support MONITOR/MWAIT if leaf 5 is not available
1058                  * to obtain the monitor linesize.
1059                  */
1060                 if (cpi->cpi_maxeax < 5)
1061                         mask_ecx &= ~CPUID_INTC_ECX_MON;
1062 
1063 #if !defined(__xpv)
1064                 /*
1065                  * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1066                  * processors.  AMD does not intend MWAIT to be used in the cpu
1067                  * idle loop on current and future processors.  10h and future
1068                  * AMD processors use more power in MWAIT than HLT.
1069                  * Pre-family-10h Opterons do not have the MWAIT instruction.
1070                  */
1071                 idle_cpu_prefer_mwait = 0;
1072 #endif
1073 
1074                 break;
1075         case X86_VENDOR_TM:
1076                 /*
1077                  * workaround the NT workaround in CMS 4.1
1078                  */
1079                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1080                     (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1081                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
1082                 break;
1083         case X86_VENDOR_Centaur:
1084                 /*
1085                  * workaround the NT workarounds again
1086                  */
1087                 if (cpi->cpi_family == 6)
1088                         cp->cp_edx |= CPUID_INTC_EDX_CX8;
1089                 break;
1090         case X86_VENDOR_Cyrix:
1091                 /*
1092                  * We rely heavily on the probing in locore
1093                  * to actually figure out what parts, if any,
1094                  * of the Cyrix cpuid instruction to believe.
1095                  */
1096                 switch (x86_type) {
1097                 case X86_TYPE_CYRIX_486:
1098                         mask_edx = 0;
1099                         break;
1100                 case X86_TYPE_CYRIX_6x86:
1101                         mask_edx = 0;
1102                         break;
1103                 case X86_TYPE_CYRIX_6x86L:
1104                         mask_edx =
1105                             CPUID_INTC_EDX_DE |
1106                             CPUID_INTC_EDX_CX8;
1107                         break;
1108                 case X86_TYPE_CYRIX_6x86MX:
1109                         mask_edx =
1110                             CPUID_INTC_EDX_DE |
1111                             CPUID_INTC_EDX_MSR |
1112                             CPUID_INTC_EDX_CX8 |
1113                             CPUID_INTC_EDX_PGE |
1114                             CPUID_INTC_EDX_CMOV |
1115                             CPUID_INTC_EDX_MMX;
1116                         break;
1117                 case X86_TYPE_CYRIX_GXm:
1118                         mask_edx =
1119                             CPUID_INTC_EDX_MSR |
1120                             CPUID_INTC_EDX_CX8 |
1121                             CPUID_INTC_EDX_CMOV |
1122                             CPUID_INTC_EDX_MMX;
1123                         break;
1124                 case X86_TYPE_CYRIX_MediaGX:
1125                         break;
1126                 case X86_TYPE_CYRIX_MII:
1127                 case X86_TYPE_VIA_CYRIX_III:
1128                         mask_edx =
1129                             CPUID_INTC_EDX_DE |
1130                             CPUID_INTC_EDX_TSC |
1131                             CPUID_INTC_EDX_MSR |
1132                             CPUID_INTC_EDX_CX8 |
1133                             CPUID_INTC_EDX_PGE |
1134                             CPUID_INTC_EDX_CMOV |
1135                             CPUID_INTC_EDX_MMX;
1136                         break;
1137                 default:
1138                         break;
1139                 }
1140                 break;
1141         }
1142 
1143 #if defined(__xpv)
1144         /*
1145          * Do not support MONITOR/MWAIT under a hypervisor
1146          */
1147         mask_ecx &= ~CPUID_INTC_ECX_MON;
1148         /*
1149          * Do not support XSAVE under a hypervisor for now
1150          */
1151         xsave_force_disable = B_TRUE;
1152 
1153 #endif  /* __xpv */
1154 
1155         if (xsave_force_disable) {
1156                 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1157                 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1158         }
1159 
1160         /*
1161          * Now we've figured out the masks that determine
1162          * which bits we choose to believe, apply the masks
1163          * to the feature words, then map the kernel's view
1164          * of these feature words into its feature word.
1165          */
1166         cp->cp_edx &= mask_edx;
1167         cp->cp_ecx &= mask_ecx;
1168 
1169         /*
1170          * apply any platform restrictions (we don't call this
1171          * immediately after __cpuid_insn here, because we need the
1172          * workarounds applied above first)
1173          */
1174         platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1175 
1176         /*
1177          * fold in overrides from the "eeprom" mechanism
1178          */
1179         cp->cp_edx |= cpuid_feature_edx_include;
1180         cp->cp_edx &= ~cpuid_feature_edx_exclude;
1181 
1182         cp->cp_ecx |= cpuid_feature_ecx_include;
1183         cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1184 
1185         if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1186                 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1187         }
1188         if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1189                 add_x86_feature(featureset, X86FSET_TSC);
1190         }
1191         if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1192                 add_x86_feature(featureset, X86FSET_MSR);
1193         }
1194         if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1195                 add_x86_feature(featureset, X86FSET_MTRR);
1196         }
1197         if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1198                 add_x86_feature(featureset, X86FSET_PGE);
1199         }
1200         if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1201                 add_x86_feature(featureset, X86FSET_CMOV);
1202         }
1203         if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1204                 add_x86_feature(featureset, X86FSET_MMX);
1205         }
1206         if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1207             (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1208                 add_x86_feature(featureset, X86FSET_MCA);
1209         }
1210         if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1211                 add_x86_feature(featureset, X86FSET_PAE);
1212         }
1213         if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1214                 add_x86_feature(featureset, X86FSET_CX8);
1215         }
1216         if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1217                 add_x86_feature(featureset, X86FSET_CX16);
1218         }
1219         if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1220                 add_x86_feature(featureset, X86FSET_PAT);
1221         }
1222         if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1223                 add_x86_feature(featureset, X86FSET_SEP);
1224         }
1225         if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1226                 /*
1227                  * In our implementation, fxsave/fxrstor
1228                  * are prerequisites before we'll even
1229                  * try and do SSE things.
1230                  */
1231                 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1232                         add_x86_feature(featureset, X86FSET_SSE);
1233                 }
1234                 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1235                         add_x86_feature(featureset, X86FSET_SSE2);
1236                 }
1237                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1238                         add_x86_feature(featureset, X86FSET_SSE3);
1239                 }
1240                 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1241                         add_x86_feature(featureset, X86FSET_SSSE3);
1242                 }
1243                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1244                         add_x86_feature(featureset, X86FSET_SSE4_1);
1245                 }
1246                 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1247                         add_x86_feature(featureset, X86FSET_SSE4_2);
1248                 }
1249                 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1250                         add_x86_feature(featureset, X86FSET_AES);
1251                 }
1252                 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1253                         add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1254                 }
1255 
1256                 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1257                         add_x86_feature(featureset, X86FSET_XSAVE);
1258                         /* We only test AVX when there is XSAVE */
1259                         if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1260                                 add_x86_feature(featureset,
1261                                     X86FSET_AVX);
1262                         }
1263                 }
1264         }
1265         if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1266                 add_x86_feature(featureset, X86FSET_DE);
1267         }
1268 #if !defined(__xpv)
1269         if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1270 
1271                 /*
1272                  * We require the CLFLUSH instruction for erratum workaround
1273                  * to use MONITOR/MWAIT.
1274                  */
1275                 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1276                         cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1277                         add_x86_feature(featureset, X86FSET_MWAIT);
1278                 } else {
1279                         extern int idle_cpu_assert_cflush_monitor;
1280 
1281                         /*
1282                          * All processors we are aware of which have
1283                          * MONITOR/MWAIT also have CLFLUSH.
1284                          */
1285                         if (idle_cpu_assert_cflush_monitor) {
1286                                 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1287                                     (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1288                         }
1289                 }
1290         }
1291 #endif  /* __xpv */
1292 
1293         if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1294                 add_x86_feature(featureset, X86FSET_VMX);
1295         }
1296 
1297         /*
1298          * Only need it first time, rest of the cpus would follow suit.
1299          * we only capture this for the bootcpu.
1300          */
1301         if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1302                 add_x86_feature(featureset, X86FSET_CLFSH);
1303                 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1304         }
1305         if (is_x86_feature(featureset, X86FSET_PAE))
1306                 cpi->cpi_pabits = 36;
1307 
1308         /*
1309          * Hyperthreading configuration is slightly tricky on Intel
1310          * and pure clones, and even trickier on AMD.
1311          *
1312          * (AMD chose to set the HTT bit on their CMP processors,
1313          * even though they're not actually hyperthreaded.  Thus it
1314          * takes a bit more work to figure out what's really going
1315          * on ... see the handling of the CMP_LGCY bit below)
1316          */
1317         if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1318                 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1319                 if (cpi->cpi_ncpu_per_chip > 1)
1320                         add_x86_feature(featureset, X86FSET_HTT);
1321         } else {
1322                 cpi->cpi_ncpu_per_chip = 1;
1323         }
1324 
1325         /*
1326          * Work on the "extended" feature information, doing
1327          * some basic initialization for cpuid_pass2()
1328          */
1329         xcpuid = 0;
1330         switch (cpi->cpi_vendor) {
1331         case X86_VENDOR_Intel:
1332                 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1333                         xcpuid++;
1334                 break;
1335         case X86_VENDOR_AMD:
1336                 if (cpi->cpi_family > 5 ||
1337                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1338                         xcpuid++;
1339                 break;
1340         case X86_VENDOR_Cyrix:
1341                 /*
1342                  * Only these Cyrix CPUs are -known- to support
1343                  * extended cpuid operations.
1344                  */
1345                 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1346                     x86_type == X86_TYPE_CYRIX_GXm)
1347                         xcpuid++;
1348                 break;
1349         case X86_VENDOR_Centaur:
1350         case X86_VENDOR_TM:
1351         default:
1352                 xcpuid++;
1353                 break;
1354         }
1355 
1356         if (xcpuid) {
1357                 cp = &cpi->cpi_extd[0];
1358                 cp->cp_eax = 0x80000000;
1359                 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1360         }
1361 
1362         if (cpi->cpi_xmaxeax & 0x80000000) {
1363 
1364                 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1365                         cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1366 
1367                 switch (cpi->cpi_vendor) {
1368                 case X86_VENDOR_Intel:
1369                 case X86_VENDOR_AMD:
1370                         if (cpi->cpi_xmaxeax < 0x80000001)
1371                                 break;
1372                         cp = &cpi->cpi_extd[1];
1373                         cp->cp_eax = 0x80000001;
1374                         (void) __cpuid_insn(cp);
1375 
1376                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1377                             cpi->cpi_family == 5 &&
1378                             cpi->cpi_model == 6 &&
1379                             cpi->cpi_step == 6) {
1380                                 /*
1381                                  * K6 model 6 uses bit 10 to indicate SYSC
1382                                  * Later models use bit 11. Fix it here.
1383                                  */
1384                                 if (cp->cp_edx & 0x400) {
1385                                         cp->cp_edx &= ~0x400;
1386                                         cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1387                                 }
1388                         }
1389 
1390                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1391 
1392                         /*
1393                          * Compute the additions to the kernel's feature word.
1394                          */
1395                         if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1396                                 add_x86_feature(featureset, X86FSET_NX);
1397                         }
1398 
1399                         /*
1400                          * Regardless whether or not we boot 64-bit,
1401                          * we should have a way to identify whether
1402                          * the CPU is capable of running 64-bit.
1403                          */
1404                         if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1405                                 add_x86_feature(featureset, X86FSET_64);
1406                         }
1407 
1408 #if defined(__amd64)
1409                         /* 1 GB large page - enable only for 64 bit kernel */
1410                         if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1411                                 add_x86_feature(featureset, X86FSET_1GPG);
1412                         }
1413 #endif
1414 
1415                         if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1416                             (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1417                             (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1418                                 add_x86_feature(featureset, X86FSET_SSE4A);
1419                         }
1420 
1421                         /*
1422                          * If both the HTT and CMP_LGCY bits are set,
1423                          * then we're not actually HyperThreaded.  Read
1424                          * "AMD CPUID Specification" for more details.
1425                          */
1426                         if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1427                             is_x86_feature(featureset, X86FSET_HTT) &&
1428                             (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1429                                 remove_x86_feature(featureset, X86FSET_HTT);
1430                                 add_x86_feature(featureset, X86FSET_CMP);
1431                         }
1432 #if defined(__amd64)
1433                         /*
1434                          * It's really tricky to support syscall/sysret in
1435                          * the i386 kernel; we rely on sysenter/sysexit
1436                          * instead.  In the amd64 kernel, things are -way-
1437                          * better.
1438                          */
1439                         if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1440                                 add_x86_feature(featureset, X86FSET_ASYSC);
1441                         }
1442 
1443                         /*
1444                          * While we're thinking about system calls, note
1445                          * that AMD processors don't support sysenter
1446                          * in long mode at all, so don't try to program them.
1447                          */
1448                         if (x86_vendor == X86_VENDOR_AMD) {
1449                                 remove_x86_feature(featureset, X86FSET_SEP);
1450                         }
1451 #endif
1452                         if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1453                                 add_x86_feature(featureset, X86FSET_TSCP);
1454                         }
1455 
1456                         if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1457                                 add_x86_feature(featureset, X86FSET_SVM);
1458                         }
1459 
1460                         if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1461                                 add_x86_feature(featureset, X86FSET_TOPOEXT);
1462                         }
1463                         break;
1464                 default:
1465                         break;
1466                 }
1467 
1468                 /*
1469                  * Get CPUID data about processor cores and hyperthreads.
1470                  */
1471                 switch (cpi->cpi_vendor) {
1472                 case X86_VENDOR_Intel:
1473                         if (cpi->cpi_maxeax >= 4) {
1474                                 cp = &cpi->cpi_std[4];
1475                                 cp->cp_eax = 4;
1476                                 cp->cp_ecx = 0;
1477                                 (void) __cpuid_insn(cp);
1478                                 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1479                         }
1480                         /*FALLTHROUGH*/
1481                 case X86_VENDOR_AMD:
1482                         if (cpi->cpi_xmaxeax < 0x80000008)
1483                                 break;
1484                         cp = &cpi->cpi_extd[8];
1485                         cp->cp_eax = 0x80000008;
1486                         (void) __cpuid_insn(cp);
1487                         platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1488 
1489                         /*
1490                          * Virtual and physical address limits from
1491                          * cpuid override previously guessed values.
1492                          */
1493                         cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1494                         cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1495                         break;
1496                 default:
1497                         break;
1498                 }
1499 
1500                 /*
1501                  * Derive the number of cores per chip
1502                  */
1503                 switch (cpi->cpi_vendor) {
1504                 case X86_VENDOR_Intel:
1505                         if (cpi->cpi_maxeax < 4) {
1506                                 cpi->cpi_ncore_per_chip = 1;
1507                                 break;
1508                         } else {
1509                                 cpi->cpi_ncore_per_chip =
1510                                     BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1511                         }
1512                         break;
1513                 case X86_VENDOR_AMD:
1514                         if (cpi->cpi_xmaxeax < 0x80000008) {
1515                                 cpi->cpi_ncore_per_chip = 1;
1516                                 break;
1517                         } else {
1518                                 /*
1519                                  * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1520                                  * 1 less than the number of physical cores on
1521                                  * the chip.  In family 0x10 this value can
1522                                  * be affected by "downcoring" - it reflects
1523                                  * 1 less than the number of cores actually
1524                                  * enabled on this node.
1525                                  */
1526                                 cpi->cpi_ncore_per_chip =
1527                                     BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1528                         }
1529                         break;
1530                 default:
1531                         cpi->cpi_ncore_per_chip = 1;
1532                         break;
1533                 }
1534 
1535                 /*
1536                  * Get CPUID data about TSC Invariance in Deep C-State.
1537                  */
1538                 switch (cpi->cpi_vendor) {
1539                 case X86_VENDOR_Intel:
1540                         if (cpi->cpi_maxeax >= 7) {
1541                                 cp = &cpi->cpi_extd[7];
1542                                 cp->cp_eax = 0x80000007;
1543                                 cp->cp_ecx = 0;
1544                                 (void) __cpuid_insn(cp);
1545                         }
1546                         break;
1547                 default:
1548                         break;
1549                 }
1550         } else {
1551                 cpi->cpi_ncore_per_chip = 1;
1552         }
1553 
1554         /*
1555          * If more than one core, then this processor is CMP.
1556          */
1557         if (cpi->cpi_ncore_per_chip > 1) {
1558                 add_x86_feature(featureset, X86FSET_CMP);
1559         }
1560 
1561         /*
1562          * If the number of cores is the same as the number
1563          * of CPUs, then we cannot have HyperThreading.
1564          */
1565         if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1566                 remove_x86_feature(featureset, X86FSET_HTT);
1567         }
1568 
1569         cpi->cpi_apicid = CPI_APIC_ID(cpi);
1570         cpi->cpi_procnodes_per_pkg = 1;
1571         cpi->cpi_cores_per_compunit = 1;
1572         if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1573             is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1574                 /*
1575                  * Single-core single-threaded processors.
1576                  */
1577                 cpi->cpi_chipid = -1;
1578                 cpi->cpi_clogid = 0;
1579                 cpi->cpi_coreid = cpu->cpu_id;
1580                 cpi->cpi_pkgcoreid = 0;
1581                 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1582                         cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1583                 else
1584                         cpi->cpi_procnodeid = cpi->cpi_chipid;
1585         } else if (cpi->cpi_ncpu_per_chip > 1) {
1586                 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1587                         cpuid_intel_getids(cpu, featureset);
1588                 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1589                         cpuid_amd_getids(cpu);
1590                 else {
1591                         /*
1592                          * All other processors are currently
1593                          * assumed to have single cores.
1594                          */
1595                         cpi->cpi_coreid = cpi->cpi_chipid;
1596                         cpi->cpi_pkgcoreid = 0;
1597                         cpi->cpi_procnodeid = cpi->cpi_chipid;
1598                         cpi->cpi_compunitid = cpi->cpi_chipid;
1599                 }
1600         }
1601 
1602         /*
1603          * Synthesize chip "revision" and socket type
1604          */
1605         cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1606             cpi->cpi_model, cpi->cpi_step);
1607         cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1608             cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1609         cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1610             cpi->cpi_model, cpi->cpi_step);
1611 
1612 pass1_done:
1613         cpi->cpi_pass = 1;
1614 }
1615 
1616 /*
1617  * Make copies of the cpuid table entries we depend on, in
1618  * part for ease of parsing now, in part so that we have only
1619  * one place to correct any of it, in part for ease of
1620  * later export to userland, and in part so we can look at
1621  * this stuff in a crash dump.
1622  */
1623 
1624 /*ARGSUSED*/
1625 void
1626 cpuid_pass2(cpu_t *cpu)
1627 {
1628         uint_t n, nmax;
1629         int i;
1630         struct cpuid_regs *cp;
1631         uint8_t *dp;
1632         uint32_t *iptr;
1633         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1634 
1635         ASSERT(cpi->cpi_pass == 1);
1636 
1637         if (cpi->cpi_maxeax < 1)
1638                 goto pass2_done;
1639 
1640         if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1641                 nmax = NMAX_CPI_STD;
1642         /*
1643          * (We already handled n == 0 and n == 1 in pass 1)
1644          */
1645         for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1646                 cp->cp_eax = n;
1647 
1648                 /*
1649                  * CPUID function 4 expects %ecx to be initialized
1650                  * with an index which indicates which cache to return
1651                  * information about. The OS is expected to call function 4
1652                  * with %ecx set to 0, 1, 2, ... until it returns with
1653                  * EAX[4:0] set to 0, which indicates there are no more
1654                  * caches.
1655                  *
1656                  * Here, populate cpi_std[4] with the information returned by
1657                  * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1658                  * when dynamic memory allocation becomes available.
1659                  *
1660                  * Note: we need to explicitly initialize %ecx here, since
1661                  * function 4 may have been previously invoked.
1662                  */
1663                 if (n == 4)
1664                         cp->cp_ecx = 0;
1665 
1666                 (void) __cpuid_insn(cp);
1667                 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1668                 switch (n) {
1669                 case 2:
1670                         /*
1671                          * "the lower 8 bits of the %eax register
1672                          * contain a value that identifies the number
1673                          * of times the cpuid [instruction] has to be
1674                          * executed to obtain a complete image of the
1675                          * processor's caching systems."
1676                          *
1677                          * How *do* they make this stuff up?
1678                          */
1679                         cpi->cpi_ncache = sizeof (*cp) *
1680                             BITX(cp->cp_eax, 7, 0);
1681                         if (cpi->cpi_ncache == 0)
1682                                 break;
1683                         cpi->cpi_ncache--;   /* skip count byte */
1684 
1685                         /*
1686                          * Well, for now, rather than attempt to implement
1687                          * this slightly dubious algorithm, we just look
1688                          * at the first 15 ..
1689                          */
1690                         if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1691                                 cpi->cpi_ncache = sizeof (*cp) - 1;
1692 
1693                         dp = cpi->cpi_cacheinfo;
1694                         if (BITX(cp->cp_eax, 31, 31) == 0) {
1695                                 uint8_t *p = (void *)&cp->cp_eax;
1696                                 for (i = 1; i < 4; i++)
1697                                         if (p[i] != 0)
1698                                                 *dp++ = p[i];
1699                         }
1700                         if (BITX(cp->cp_ebx, 31, 31) == 0) {
1701                                 uint8_t *p = (void *)&cp->cp_ebx;
1702                                 for (i = 0; i < 4; i++)
1703                                         if (p[i] != 0)
1704                                                 *dp++ = p[i];
1705                         }
1706                         if (BITX(cp->cp_ecx, 31, 31) == 0) {
1707                                 uint8_t *p = (void *)&cp->cp_ecx;
1708                                 for (i = 0; i < 4; i++)
1709                                         if (p[i] != 0)
1710                                                 *dp++ = p[i];
1711                         }
1712                         if (BITX(cp->cp_edx, 31, 31) == 0) {
1713                                 uint8_t *p = (void *)&cp->cp_edx;
1714                                 for (i = 0; i < 4; i++)
1715                                         if (p[i] != 0)
1716                                                 *dp++ = p[i];
1717                         }
1718                         break;
1719 
1720                 case 3: /* Processor serial number, if PSN supported */
1721                         break;
1722 
1723                 case 4: /* Deterministic cache parameters */
1724                         break;
1725 
1726                 case 5: /* Monitor/Mwait parameters */
1727                 {
1728                         size_t mwait_size;
1729 
1730                         /*
1731                          * check cpi_mwait.support which was set in cpuid_pass1
1732                          */
1733                         if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1734                                 break;
1735 
1736                         /*
1737                          * Protect ourself from insane mwait line size.
1738                          * Workaround for incomplete hardware emulator(s).
1739                          */
1740                         mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1741                         if (mwait_size < sizeof (uint32_t) ||
1742                             !ISP2(mwait_size)) {
1743 #if DEBUG
1744                                 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1745                                     "size %ld", cpu->cpu_id, (long)mwait_size);
1746 #endif
1747                                 break;
1748                         }
1749 
1750                         cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1751                         cpi->cpi_mwait.mon_max = mwait_size;
1752                         if (MWAIT_EXTENSION(cpi)) {
1753                                 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1754                                 if (MWAIT_INT_ENABLE(cpi))
1755                                         cpi->cpi_mwait.support |=
1756                                             MWAIT_ECX_INT_ENABLE;
1757                         }
1758                         break;
1759                 }
1760                 default:
1761                         break;
1762                 }
1763         }
1764 
1765         if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1766                 struct cpuid_regs regs;
1767 
1768                 cp = &regs;
1769                 cp->cp_eax = 0xB;
1770                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1771 
1772                 (void) __cpuid_insn(cp);
1773 
1774                 /*
1775                  * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1776                  * indicates that the extended topology enumeration leaf is
1777                  * available.
1778                  */
1779                 if (cp->cp_ebx) {
1780                         uint32_t x2apic_id;
1781                         uint_t coreid_shift = 0;
1782                         uint_t ncpu_per_core = 1;
1783                         uint_t chipid_shift = 0;
1784                         uint_t ncpu_per_chip = 1;
1785                         uint_t i;
1786                         uint_t level;
1787 
1788                         for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1789                                 cp->cp_eax = 0xB;
1790                                 cp->cp_ecx = i;
1791 
1792                                 (void) __cpuid_insn(cp);
1793                                 level = CPI_CPU_LEVEL_TYPE(cp);
1794 
1795                                 if (level == 1) {
1796                                         x2apic_id = cp->cp_edx;
1797                                         coreid_shift = BITX(cp->cp_eax, 4, 0);
1798                                         ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1799                                 } else if (level == 2) {
1800                                         x2apic_id = cp->cp_edx;
1801                                         chipid_shift = BITX(cp->cp_eax, 4, 0);
1802                                         ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1803                                 }
1804                         }
1805 
1806                         cpi->cpi_apicid = x2apic_id;
1807                         cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1808                         cpi->cpi_ncore_per_chip = ncpu_per_chip /
1809                             ncpu_per_core;
1810                         cpi->cpi_chipid = x2apic_id >> chipid_shift;
1811                         cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1812                         cpi->cpi_coreid = x2apic_id >> coreid_shift;
1813                         cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1814                 }
1815 
1816                 /* Make cp NULL so that we don't stumble on others */
1817                 cp = NULL;
1818         }
1819 
1820         /*
1821          * XSAVE enumeration
1822          */
1823         if (cpi->cpi_maxeax >= 0xD) {
1824                 struct cpuid_regs regs;
1825                 boolean_t cpuid_d_valid = B_TRUE;
1826 
1827                 cp = &regs;
1828                 cp->cp_eax = 0xD;
1829                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1830 
1831                 (void) __cpuid_insn(cp);
1832 
1833                 /*
1834                  * Sanity checks for debug
1835                  */
1836                 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1837                     (cp->cp_eax & XFEATURE_SSE) == 0) {
1838                         cpuid_d_valid = B_FALSE;
1839                 }
1840 
1841                 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1842                 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1843                 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1844 
1845                 /*
1846                  * If the hw supports AVX, get the size and offset in the save
1847                  * area for the ymm state.
1848                  */
1849                 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1850                         cp->cp_eax = 0xD;
1851                         cp->cp_ecx = 2;
1852                         cp->cp_edx = cp->cp_ebx = 0;
1853 
1854                         (void) __cpuid_insn(cp);
1855 
1856                         if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1857                             cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1858                                 cpuid_d_valid = B_FALSE;
1859                         }
1860 
1861                         cpi->cpi_xsave.ymm_size = cp->cp_eax;
1862                         cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1863                 }
1864 
1865                 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1866                         xsave_state_size = 0;
1867                 } else if (cpuid_d_valid) {
1868                         xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1869                 } else {
1870                         /* Broken CPUID 0xD, probably in HVM */
1871                         cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1872                             "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1873                             ", ymm_size = %d, ymm_offset = %d\n",
1874                             cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1875                             cpi->cpi_xsave.xsav_hw_features_high,
1876                             (int)cpi->cpi_xsave.xsav_max_size,
1877                             (int)cpi->cpi_xsave.ymm_size,
1878                             (int)cpi->cpi_xsave.ymm_offset);
1879 
1880                         if (xsave_state_size != 0) {
1881                                 /*
1882                                  * This must be a non-boot CPU. We cannot
1883                                  * continue, because boot cpu has already
1884                                  * enabled XSAVE.
1885                                  */
1886                                 ASSERT(cpu->cpu_id != 0);
1887                                 cmn_err(CE_PANIC, "cpu%d: we have already "
1888                                     "enabled XSAVE on boot cpu, cannot "
1889                                     "continue.", cpu->cpu_id);
1890                         } else {
1891                                 /*
1892                                  * Must be from boot CPU, OK to disable XSAVE.
1893                                  */
1894                                 ASSERT(cpu->cpu_id == 0);
1895                                 remove_x86_feature(x86_featureset,
1896                                     X86FSET_XSAVE);
1897                                 remove_x86_feature(x86_featureset, X86FSET_AVX);
1898                                 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1899                                 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1900                                 xsave_force_disable = B_TRUE;
1901                         }
1902                 }
1903         }
1904 
1905 
1906         if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1907                 goto pass2_done;
1908 
1909         if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1910                 nmax = NMAX_CPI_EXTD;
1911         /*
1912          * Copy the extended properties, fixing them as we go.
1913          * (We already handled n == 0 and n == 1 in pass 1)
1914          */
1915         iptr = (void *)cpi->cpi_brandstr;
1916         for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1917                 cp->cp_eax = 0x80000000 + n;
1918                 (void) __cpuid_insn(cp);
1919                 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1920                 switch (n) {
1921                 case 2:
1922                 case 3:
1923                 case 4:
1924                         /*
1925                          * Extract the brand string
1926                          */
1927                         *iptr++ = cp->cp_eax;
1928                         *iptr++ = cp->cp_ebx;
1929                         *iptr++ = cp->cp_ecx;
1930                         *iptr++ = cp->cp_edx;
1931                         break;
1932                 case 5:
1933                         switch (cpi->cpi_vendor) {
1934                         case X86_VENDOR_AMD:
1935                                 /*
1936                                  * The Athlon and Duron were the first
1937                                  * parts to report the sizes of the
1938                                  * TLB for large pages. Before then,
1939                                  * we don't trust the data.
1940                                  */
1941                                 if (cpi->cpi_family < 6 ||
1942                                     (cpi->cpi_family == 6 &&
1943                                     cpi->cpi_model < 1))
1944                                         cp->cp_eax = 0;
1945                                 break;
1946                         default:
1947                                 break;
1948                         }
1949                         break;
1950                 case 6:
1951                         switch (cpi->cpi_vendor) {
1952                         case X86_VENDOR_AMD:
1953                                 /*
1954                                  * The Athlon and Duron were the first
1955                                  * AMD parts with L2 TLB's.
1956                                  * Before then, don't trust the data.
1957                                  */
1958                                 if (cpi->cpi_family < 6 ||
1959                                     cpi->cpi_family == 6 &&
1960                                     cpi->cpi_model < 1)
1961                                         cp->cp_eax = cp->cp_ebx = 0;
1962                                 /*
1963                                  * AMD Duron rev A0 reports L2
1964                                  * cache size incorrectly as 1K
1965                                  * when it is really 64K
1966                                  */
1967                                 if (cpi->cpi_family == 6 &&
1968                                     cpi->cpi_model == 3 &&
1969                                     cpi->cpi_step == 0) {
1970                                         cp->cp_ecx &= 0xffff;
1971                                         cp->cp_ecx |= 0x400000;
1972                                 }
1973                                 break;
1974                         case X86_VENDOR_Cyrix:  /* VIA C3 */
1975                                 /*
1976                                  * VIA C3 processors are a bit messed
1977                                  * up w.r.t. encoding cache sizes in %ecx
1978                                  */
1979                                 if (cpi->cpi_family != 6)
1980                                         break;
1981                                 /*
1982                                  * model 7 and 8 were incorrectly encoded
1983                                  *
1984                                  * xxx is model 8 really broken?
1985                                  */
1986                                 if (cpi->cpi_model == 7 ||
1987                                     cpi->cpi_model == 8)
1988                                         cp->cp_ecx =
1989                                             BITX(cp->cp_ecx, 31, 24) << 16 |
1990                                             BITX(cp->cp_ecx, 23, 16) << 12 |
1991                                             BITX(cp->cp_ecx, 15, 8) << 8 |
1992                                             BITX(cp->cp_ecx, 7, 0);
1993                                 /*
1994                                  * model 9 stepping 1 has wrong associativity
1995                                  */
1996                                 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1997                                         cp->cp_ecx |= 8 << 12;
1998                                 break;
1999                         case X86_VENDOR_Intel:
2000                                 /*
2001                                  * Extended L2 Cache features function.
2002                                  * First appeared on Prescott.
2003                                  */
2004                         default:
2005                                 break;
2006                         }
2007                         break;
2008                 default:
2009                         break;
2010                 }
2011         }
2012 
2013 pass2_done:
2014         cpi->cpi_pass = 2;
2015 }
2016 
2017 static const char *
2018 intel_cpubrand(const struct cpuid_info *cpi)
2019 {
2020         int i;
2021 
2022         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2023             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2024                 return ("i486");
2025 
2026         switch (cpi->cpi_family) {
2027         case 5:
2028                 return ("Intel Pentium(r)");
2029         case 6:
2030                 switch (cpi->cpi_model) {
2031                         uint_t celeron, xeon;
2032                         const struct cpuid_regs *cp;
2033                 case 0:
2034                 case 1:
2035                 case 2:
2036                         return ("Intel Pentium(r) Pro");
2037                 case 3:
2038                 case 4:
2039                         return ("Intel Pentium(r) II");
2040                 case 6:
2041                         return ("Intel Celeron(r)");
2042                 case 5:
2043                 case 7:
2044                         celeron = xeon = 0;
2045                         cp = &cpi->cpi_std[2];   /* cache info */
2046 
2047                         for (i = 1; i < 4; i++) {
2048                                 uint_t tmp;
2049 
2050                                 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2051                                 if (tmp == 0x40)
2052                                         celeron++;
2053                                 if (tmp >= 0x44 && tmp <= 0x45)
2054                                         xeon++;
2055                         }
2056 
2057                         for (i = 0; i < 2; i++) {
2058                                 uint_t tmp;
2059 
2060                                 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2061                                 if (tmp == 0x40)
2062                                         celeron++;
2063                                 else if (tmp >= 0x44 && tmp <= 0x45)
2064                                         xeon++;
2065                         }
2066 
2067                         for (i = 0; i < 4; i++) {
2068                                 uint_t tmp;
2069 
2070                                 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2071                                 if (tmp == 0x40)
2072                                         celeron++;
2073                                 else if (tmp >= 0x44 && tmp <= 0x45)
2074                                         xeon++;
2075                         }
2076 
2077                         for (i = 0; i < 4; i++) {
2078                                 uint_t tmp;
2079 
2080                                 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2081                                 if (tmp == 0x40)
2082                                         celeron++;
2083                                 else if (tmp >= 0x44 && tmp <= 0x45)
2084                                         xeon++;
2085                         }
2086 
2087                         if (celeron)
2088                                 return ("Intel Celeron(r)");
2089                         if (xeon)
2090                                 return (cpi->cpi_model == 5 ?
2091                                     "Intel Pentium(r) II Xeon(tm)" :
2092                                     "Intel Pentium(r) III Xeon(tm)");
2093                         return (cpi->cpi_model == 5 ?
2094                             "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2095                             "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2096                 default:
2097                         break;
2098                 }
2099         default:
2100                 break;
2101         }
2102 
2103         /* BrandID is present if the field is nonzero */
2104         if (cpi->cpi_brandid != 0) {
2105                 static const struct {
2106                         uint_t bt_bid;
2107                         const char *bt_str;
2108                 } brand_tbl[] = {
2109                         { 0x1,  "Intel(r) Celeron(r)" },
2110                         { 0x2,  "Intel(r) Pentium(r) III" },
2111                         { 0x3,  "Intel(r) Pentium(r) III Xeon(tm)" },
2112                         { 0x4,  "Intel(r) Pentium(r) III" },
2113                         { 0x6,  "Mobile Intel(r) Pentium(r) III" },
2114                         { 0x7,  "Mobile Intel(r) Celeron(r)" },
2115                         { 0x8,  "Intel(r) Pentium(r) 4" },
2116                         { 0x9,  "Intel(r) Pentium(r) 4" },
2117                         { 0xa,  "Intel(r) Celeron(r)" },
2118                         { 0xb,  "Intel(r) Xeon(tm)" },
2119                         { 0xc,  "Intel(r) Xeon(tm) MP" },
2120                         { 0xe,  "Mobile Intel(r) Pentium(r) 4" },
2121                         { 0xf,  "Mobile Intel(r) Celeron(r)" },
2122                         { 0x11, "Mobile Genuine Intel(r)" },
2123                         { 0x12, "Intel(r) Celeron(r) M" },
2124                         { 0x13, "Mobile Intel(r) Celeron(r)" },
2125                         { 0x14, "Intel(r) Celeron(r)" },
2126                         { 0x15, "Mobile Genuine Intel(r)" },
2127                         { 0x16, "Intel(r) Pentium(r) M" },
2128                         { 0x17, "Mobile Intel(r) Celeron(r)" }
2129                 };
2130                 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2131                 uint_t sgn;
2132 
2133                 sgn = (cpi->cpi_family << 8) |
2134                     (cpi->cpi_model << 4) | cpi->cpi_step;
2135 
2136                 for (i = 0; i < btblmax; i++)
2137                         if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2138                                 break;
2139                 if (i < btblmax) {
2140                         if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2141                                 return ("Intel(r) Celeron(r)");
2142                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2143                                 return ("Intel(r) Xeon(tm) MP");
2144                         if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2145                                 return ("Intel(r) Xeon(tm)");
2146                         return (brand_tbl[i].bt_str);
2147                 }
2148         }
2149 
2150         return (NULL);
2151 }
2152 
2153 static const char *
2154 amd_cpubrand(const struct cpuid_info *cpi)
2155 {
2156         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2157             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2158                 return ("i486 compatible");
2159 
2160         switch (cpi->cpi_family) {
2161         case 5:
2162                 switch (cpi->cpi_model) {
2163                 case 0:
2164                 case 1:
2165                 case 2:
2166                 case 3:
2167                 case 4:
2168                 case 5:
2169                         return ("AMD-K5(r)");
2170                 case 6:
2171                 case 7:
2172                         return ("AMD-K6(r)");
2173                 case 8:
2174                         return ("AMD-K6(r)-2");
2175                 case 9:
2176                         return ("AMD-K6(r)-III");
2177                 default:
2178                         return ("AMD (family 5)");
2179                 }
2180         case 6:
2181                 switch (cpi->cpi_model) {
2182                 case 1:
2183                         return ("AMD-K7(tm)");
2184                 case 0:
2185                 case 2:
2186                 case 4:
2187                         return ("AMD Athlon(tm)");
2188                 case 3:
2189                 case 7:
2190                         return ("AMD Duron(tm)");
2191                 case 6:
2192                 case 8:
2193                 case 10:
2194                         /*
2195                          * Use the L2 cache size to distinguish
2196                          */
2197                         return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2198                             "AMD Athlon(tm)" : "AMD Duron(tm)");
2199                 default:
2200                         return ("AMD (family 6)");
2201                 }
2202         default:
2203                 break;
2204         }
2205 
2206         if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2207             cpi->cpi_brandid != 0) {
2208                 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2209                 case 3:
2210                         return ("AMD Opteron(tm) UP 1xx");
2211                 case 4:
2212                         return ("AMD Opteron(tm) DP 2xx");
2213                 case 5:
2214                         return ("AMD Opteron(tm) MP 8xx");
2215                 default:
2216                         return ("AMD Opteron(tm)");
2217                 }
2218         }
2219 
2220         return (NULL);
2221 }
2222 
2223 static const char *
2224 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2225 {
2226         if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2227             cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2228             type == X86_TYPE_CYRIX_486)
2229                 return ("i486 compatible");
2230 
2231         switch (type) {
2232         case X86_TYPE_CYRIX_6x86:
2233                 return ("Cyrix 6x86");
2234         case X86_TYPE_CYRIX_6x86L:
2235                 return ("Cyrix 6x86L");
2236         case X86_TYPE_CYRIX_6x86MX:
2237                 return ("Cyrix 6x86MX");
2238         case X86_TYPE_CYRIX_GXm:
2239                 return ("Cyrix GXm");
2240         case X86_TYPE_CYRIX_MediaGX:
2241                 return ("Cyrix MediaGX");
2242         case X86_TYPE_CYRIX_MII:
2243                 return ("Cyrix M2");
2244         case X86_TYPE_VIA_CYRIX_III:
2245                 return ("VIA Cyrix M3");
2246         default:
2247                 /*
2248                  * Have another wild guess ..
2249                  */
2250                 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2251                         return ("Cyrix 5x86");
2252                 else if (cpi->cpi_family == 5) {
2253                         switch (cpi->cpi_model) {
2254                         case 2:
2255                                 return ("Cyrix 6x86");  /* Cyrix M1 */
2256                         case 4:
2257                                 return ("Cyrix MediaGX");
2258                         default:
2259                                 break;
2260                         }
2261                 } else if (cpi->cpi_family == 6) {
2262                         switch (cpi->cpi_model) {
2263                         case 0:
2264                                 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2265                         case 5:
2266                         case 6:
2267                         case 7:
2268                         case 8:
2269                         case 9:
2270                                 return ("VIA C3");
2271                         default:
2272                                 break;
2273                         }
2274                 }
2275                 break;
2276         }
2277         return (NULL);
2278 }
2279 
2280 /*
2281  * This only gets called in the case that the CPU extended
2282  * feature brand string (0x80000002, 0x80000003, 0x80000004)
2283  * aren't available, or contain null bytes for some reason.
2284  */
2285 static void
2286 fabricate_brandstr(struct cpuid_info *cpi)
2287 {
2288         const char *brand = NULL;
2289 
2290         switch (cpi->cpi_vendor) {
2291         case X86_VENDOR_Intel:
2292                 brand = intel_cpubrand(cpi);
2293                 break;
2294         case X86_VENDOR_AMD:
2295                 brand = amd_cpubrand(cpi);
2296                 break;
2297         case X86_VENDOR_Cyrix:
2298                 brand = cyrix_cpubrand(cpi, x86_type);
2299                 break;
2300         case X86_VENDOR_NexGen:
2301                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2302                         brand = "NexGen Nx586";
2303                 break;
2304         case X86_VENDOR_Centaur:
2305                 if (cpi->cpi_family == 5)
2306                         switch (cpi->cpi_model) {
2307                         case 4:
2308                                 brand = "Centaur C6";
2309                                 break;
2310                         case 8:
2311                                 brand = "Centaur C2";
2312                                 break;
2313                         case 9:
2314                                 brand = "Centaur C3";
2315                                 break;
2316                         default:
2317                                 break;
2318                         }
2319                 break;
2320         case X86_VENDOR_Rise:
2321                 if (cpi->cpi_family == 5 &&
2322                     (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2323                         brand = "Rise mP6";
2324                 break;
2325         case X86_VENDOR_SiS:
2326                 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2327                         brand = "SiS 55x";
2328                 break;
2329         case X86_VENDOR_TM:
2330                 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2331                         brand = "Transmeta Crusoe TM3x00 or TM5x00";
2332                 break;
2333         case X86_VENDOR_NSC:
2334         case X86_VENDOR_UMC:
2335         default:
2336                 break;
2337         }
2338         if (brand) {
2339                 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2340                 return;
2341         }
2342 
2343         /*
2344          * If all else fails ...
2345          */
2346         (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2347             "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2348             cpi->cpi_model, cpi->cpi_step);
2349 }
2350 
2351 /*
2352  * This routine is called just after kernel memory allocation
2353  * becomes available on cpu0, and as part of mp_startup() on
2354  * the other cpus.
2355  *
2356  * Fixup the brand string, and collect any information from cpuid
2357  * that requires dynamicically allocated storage to represent.
2358  */
2359 /*ARGSUSED*/
2360 void
2361 cpuid_pass3(cpu_t *cpu)
2362 {
2363         int     i, max, shft, level, size;
2364         struct cpuid_regs regs;
2365         struct cpuid_regs *cp;
2366         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2367 
2368         ASSERT(cpi->cpi_pass == 2);
2369 
2370         /*
2371          * Function 4: Deterministic cache parameters
2372          *
2373          * Take this opportunity to detect the number of threads
2374          * sharing the last level cache, and construct a corresponding
2375          * cache id. The respective cpuid_info members are initialized
2376          * to the default case of "no last level cache sharing".
2377          */
2378         cpi->cpi_ncpu_shr_last_cache = 1;
2379         cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2380 
2381         if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2382 
2383                 /*
2384                  * Find the # of elements (size) returned by fn 4, and along
2385                  * the way detect last level cache sharing details.
2386                  */
2387                 bzero(&regs, sizeof (regs));
2388                 cp = &regs;
2389                 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2390                         cp->cp_eax = 4;
2391                         cp->cp_ecx = i;
2392 
2393                         (void) __cpuid_insn(cp);
2394 
2395                         if (CPI_CACHE_TYPE(cp) == 0)
2396                                 break;
2397                         level = CPI_CACHE_LVL(cp);
2398                         if (level > max) {
2399                                 max = level;
2400                                 cpi->cpi_ncpu_shr_last_cache =
2401                                     CPI_NTHR_SHR_CACHE(cp) + 1;
2402                         }
2403                 }
2404                 cpi->cpi_std_4_size = size = i;
2405 
2406                 /*
2407                  * Allocate the cpi_std_4 array. The first element
2408                  * references the regs for fn 4, %ecx == 0, which
2409                  * cpuid_pass2() stashed in cpi->cpi_std[4].
2410                  */
2411                 if (size > 0) {
2412                         cpi->cpi_std_4 =
2413                             kmem_alloc(size * sizeof (cp), KM_SLEEP);
2414                         cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2415 
2416                         /*
2417                          * Allocate storage to hold the additional regs
2418                          * for function 4, %ecx == 1 .. cpi_std_4_size.
2419                          *
2420                          * The regs for fn 4, %ecx == 0 has already
2421                          * been allocated as indicated above.
2422                          */
2423                         for (i = 1; i < size; i++) {
2424                                 cp = cpi->cpi_std_4[i] =
2425                                     kmem_zalloc(sizeof (regs), KM_SLEEP);
2426                                 cp->cp_eax = 4;
2427                                 cp->cp_ecx = i;
2428 
2429                                 (void) __cpuid_insn(cp);
2430                         }
2431                 }
2432                 /*
2433                  * Determine the number of bits needed to represent
2434                  * the number of CPUs sharing the last level cache.
2435                  *
2436                  * Shift off that number of bits from the APIC id to
2437                  * derive the cache id.
2438                  */
2439                 shft = 0;
2440                 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2441                         shft++;
2442                 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2443         }
2444 
2445         /*
2446          * Now fixup the brand string
2447          */
2448         if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2449                 fabricate_brandstr(cpi);
2450         } else {
2451 
2452                 /*
2453                  * If we successfully extracted a brand string from the cpuid
2454                  * instruction, clean it up by removing leading spaces and
2455                  * similar junk.
2456                  */
2457                 if (cpi->cpi_brandstr[0]) {
2458                         size_t maxlen = sizeof (cpi->cpi_brandstr);
2459                         char *src, *dst;
2460 
2461                         dst = src = (char *)cpi->cpi_brandstr;
2462                         src[maxlen - 1] = '\0';
2463                         /*
2464                          * strip leading spaces
2465                          */
2466                         while (*src == ' ')
2467                                 src++;
2468                         /*
2469                          * Remove any 'Genuine' or "Authentic" prefixes
2470                          */
2471                         if (strncmp(src, "Genuine ", 8) == 0)
2472                                 src += 8;
2473                         if (strncmp(src, "Authentic ", 10) == 0)
2474                                 src += 10;
2475 
2476                         /*
2477                          * Now do an in-place copy.
2478                          * Map (R) to (r) and (TM) to (tm).
2479                          * The era of teletypes is long gone, and there's
2480                          * -really- no need to shout.
2481                          */
2482                         while (*src != '\0') {
2483                                 if (src[0] == '(') {
2484                                         if (strncmp(src + 1, "R)", 2) == 0) {
2485                                                 (void) strncpy(dst, "(r)", 3);
2486                                                 src += 3;
2487                                                 dst += 3;
2488                                                 continue;
2489                                         }
2490                                         if (strncmp(src + 1, "TM)", 3) == 0) {
2491                                                 (void) strncpy(dst, "(tm)", 4);
2492                                                 src += 4;
2493                                                 dst += 4;
2494                                                 continue;
2495                                         }
2496                                 }
2497                                 *dst++ = *src++;
2498                         }
2499                         *dst = '\0';
2500 
2501                         /*
2502                          * Finally, remove any trailing spaces
2503                          */
2504                         while (--dst > cpi->cpi_brandstr)
2505                                 if (*dst == ' ')
2506                                         *dst = '\0';
2507                                 else
2508                                         break;
2509                 } else
2510                         fabricate_brandstr(cpi);
2511         }
2512         cpi->cpi_pass = 3;
2513 }
2514 
2515 /*
2516  * This routine is called out of bind_hwcap() much later in the life
2517  * of the kernel (post_startup()).  The job of this routine is to resolve
2518  * the hardware feature support and kernel support for those features into
2519  * what we're actually going to tell applications via the aux vector.
2520  */
2521 uint_t
2522 cpuid_pass4(cpu_t *cpu)
2523 {
2524         struct cpuid_info *cpi;
2525         uint_t hwcap_flags = 0;
2526 
2527         if (cpu == NULL)
2528                 cpu = CPU;
2529         cpi = cpu->cpu_m.mcpu_cpi;
2530 
2531         ASSERT(cpi->cpi_pass == 3);
2532 
2533         if (cpi->cpi_maxeax >= 1) {
2534                 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2535                 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2536 
2537                 *edx = CPI_FEATURES_EDX(cpi);
2538                 *ecx = CPI_FEATURES_ECX(cpi);
2539 
2540                 /*
2541                  * [these require explicit kernel support]
2542                  */
2543                 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2544                         *edx &= ~CPUID_INTC_EDX_SEP;
2545 
2546                 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2547                         *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2548                 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2549                         *edx &= ~CPUID_INTC_EDX_SSE2;
2550 
2551                 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2552                         *edx &= ~CPUID_INTC_EDX_HTT;
2553 
2554                 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2555                         *ecx &= ~CPUID_INTC_ECX_SSE3;
2556 
2557                 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2558                         *ecx &= ~CPUID_INTC_ECX_SSSE3;
2559                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2560                         *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2561                 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2562                         *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2563                 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2564                         *ecx &= ~CPUID_INTC_ECX_AES;
2565                 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2566                         *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2567                 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2568                         *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2569                             CPUID_INTC_ECX_OSXSAVE);
2570                 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2571                         *ecx &= ~CPUID_INTC_ECX_AVX;
2572 
2573                 /*
2574                  * [no explicit support required beyond x87 fp context]
2575                  */
2576                 if (!fpu_exists)
2577                         *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2578 
2579                 /*
2580                  * Now map the supported feature vector to things that we
2581                  * think userland will care about.
2582                  */
2583                 if (*edx & CPUID_INTC_EDX_SEP)
2584                         hwcap_flags |= AV_386_SEP;
2585                 if (*edx & CPUID_INTC_EDX_SSE)
2586                         hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2587                 if (*edx & CPUID_INTC_EDX_SSE2)
2588                         hwcap_flags |= AV_386_SSE2;
2589                 if (*ecx & CPUID_INTC_ECX_SSE3)
2590                         hwcap_flags |= AV_386_SSE3;
2591                 if (*ecx & CPUID_INTC_ECX_SSSE3)
2592                         hwcap_flags |= AV_386_SSSE3;
2593                 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2594                         hwcap_flags |= AV_386_SSE4_1;
2595                 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2596                         hwcap_flags |= AV_386_SSE4_2;
2597                 if (*ecx & CPUID_INTC_ECX_MOVBE)
2598                         hwcap_flags |= AV_386_MOVBE;
2599                 if (*ecx & CPUID_INTC_ECX_AES)
2600                         hwcap_flags |= AV_386_AES;
2601                 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2602                         hwcap_flags |= AV_386_PCLMULQDQ;
2603                 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2604                     (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2605                         hwcap_flags |= AV_386_XSAVE;
2606 
2607                         if (*ecx & CPUID_INTC_ECX_AVX)
2608                                 hwcap_flags |= AV_386_AVX;
2609                 }
2610                 if (*ecx & CPUID_INTC_ECX_VMX)
2611                         hwcap_flags |= AV_386_VMX;
2612                 if (*ecx & CPUID_INTC_ECX_POPCNT)
2613                         hwcap_flags |= AV_386_POPCNT;
2614                 if (*edx & CPUID_INTC_EDX_FPU)
2615                         hwcap_flags |= AV_386_FPU;
2616                 if (*edx & CPUID_INTC_EDX_MMX)
2617                         hwcap_flags |= AV_386_MMX;
2618 
2619                 if (*edx & CPUID_INTC_EDX_TSC)
2620                         hwcap_flags |= AV_386_TSC;
2621                 if (*edx & CPUID_INTC_EDX_CX8)
2622                         hwcap_flags |= AV_386_CX8;
2623                 if (*edx & CPUID_INTC_EDX_CMOV)
2624                         hwcap_flags |= AV_386_CMOV;
2625                 if (*ecx & CPUID_INTC_ECX_CX16)
2626                         hwcap_flags |= AV_386_CX16;
2627         }
2628 
2629         if (cpi->cpi_xmaxeax < 0x80000001)
2630                 goto pass4_done;
2631 
2632         switch (cpi->cpi_vendor) {
2633                 struct cpuid_regs cp;
2634                 uint32_t *edx, *ecx;
2635 
2636         case X86_VENDOR_Intel:
2637                 /*
2638                  * Seems like Intel duplicated what we necessary
2639                  * here to make the initial crop of 64-bit OS's work.
2640                  * Hopefully, those are the only "extended" bits
2641                  * they'll add.
2642                  */
2643                 /*FALLTHROUGH*/
2644 
2645         case X86_VENDOR_AMD:
2646                 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2647                 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2648 
2649                 *edx = CPI_FEATURES_XTD_EDX(cpi);
2650                 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2651 
2652                 /*
2653                  * [these features require explicit kernel support]
2654                  */
2655                 switch (cpi->cpi_vendor) {
2656                 case X86_VENDOR_Intel:
2657                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2658                                 *edx &= ~CPUID_AMD_EDX_TSCP;
2659                         break;
2660 
2661                 case X86_VENDOR_AMD:
2662                         if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2663                                 *edx &= ~CPUID_AMD_EDX_TSCP;
2664                         if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2665                                 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2666                         break;
2667 
2668                 default:
2669                         break;
2670                 }
2671 
2672                 /*
2673                  * [no explicit support required beyond
2674                  * x87 fp context and exception handlers]
2675                  */
2676                 if (!fpu_exists)
2677                         *edx &= ~(CPUID_AMD_EDX_MMXamd |
2678                             CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2679 
2680                 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2681                         *edx &= ~CPUID_AMD_EDX_NX;
2682 #if !defined(__amd64)
2683                 *edx &= ~CPUID_AMD_EDX_LM;
2684 #endif
2685                 /*
2686                  * Now map the supported feature vector to
2687                  * things that we think userland will care about.
2688                  */
2689 #if defined(__amd64)
2690                 if (*edx & CPUID_AMD_EDX_SYSC)
2691                         hwcap_flags |= AV_386_AMD_SYSC;
2692 #endif
2693                 if (*edx & CPUID_AMD_EDX_MMXamd)
2694                         hwcap_flags |= AV_386_AMD_MMX;
2695                 if (*edx & CPUID_AMD_EDX_3DNow)
2696                         hwcap_flags |= AV_386_AMD_3DNow;
2697                 if (*edx & CPUID_AMD_EDX_3DNowx)
2698                         hwcap_flags |= AV_386_AMD_3DNowx;
2699                 if (*ecx & CPUID_AMD_ECX_SVM)
2700                         hwcap_flags |= AV_386_AMD_SVM;
2701 
2702                 switch (cpi->cpi_vendor) {
2703                 case X86_VENDOR_AMD:
2704                         if (*edx & CPUID_AMD_EDX_TSCP)
2705                                 hwcap_flags |= AV_386_TSCP;
2706                         if (*ecx & CPUID_AMD_ECX_AHF64)
2707                                 hwcap_flags |= AV_386_AHF;
2708                         if (*ecx & CPUID_AMD_ECX_SSE4A)
2709                                 hwcap_flags |= AV_386_AMD_SSE4A;
2710                         if (*ecx & CPUID_AMD_ECX_LZCNT)
2711                                 hwcap_flags |= AV_386_AMD_LZCNT;
2712                         break;
2713 
2714                 case X86_VENDOR_Intel:
2715                         if (*edx & CPUID_AMD_EDX_TSCP)
2716                                 hwcap_flags |= AV_386_TSCP;
2717                         /*
2718                          * Aarrgh.
2719                          * Intel uses a different bit in the same word.
2720                          */
2721                         if (*ecx & CPUID_INTC_ECX_AHF64)
2722                                 hwcap_flags |= AV_386_AHF;
2723                         break;
2724 
2725                 default:
2726                         break;
2727                 }
2728                 break;
2729 
2730         case X86_VENDOR_TM:
2731                 cp.cp_eax = 0x80860001;
2732                 (void) __cpuid_insn(&cp);
2733                 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2734                 break;
2735 
2736         default:
2737                 break;
2738         }
2739 
2740 pass4_done:
2741         cpi->cpi_pass = 4;
2742         return (hwcap_flags);
2743 }
2744 
2745 
2746 /*
2747  * Simulate the cpuid instruction using the data we previously
2748  * captured about this CPU.  We try our best to return the truth
2749  * about the hardware, independently of kernel support.
2750  */
2751 uint32_t
2752 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2753 {
2754         struct cpuid_info *cpi;
2755         struct cpuid_regs *xcp;
2756 
2757         if (cpu == NULL)
2758                 cpu = CPU;
2759         cpi = cpu->cpu_m.mcpu_cpi;
2760 
2761         ASSERT(cpuid_checkpass(cpu, 3));
2762 
2763         /*
2764          * CPUID data is cached in two separate places: cpi_std for standard
2765          * CPUID functions, and cpi_extd for extended CPUID functions.
2766          */
2767         if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2768                 xcp = &cpi->cpi_std[cp->cp_eax];
2769         else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2770             cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2771                 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2772         else
2773                 /*
2774                  * The caller is asking for data from an input parameter which
2775                  * the kernel has not cached.  In this case we go fetch from
2776                  * the hardware and return the data directly to the user.
2777                  */
2778                 return (__cpuid_insn(cp));
2779 
2780         cp->cp_eax = xcp->cp_eax;
2781         cp->cp_ebx = xcp->cp_ebx;
2782         cp->cp_ecx = xcp->cp_ecx;
2783         cp->cp_edx = xcp->cp_edx;
2784         return (cp->cp_eax);
2785 }
2786 
2787 int
2788 cpuid_checkpass(cpu_t *cpu, int pass)
2789 {
2790         return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2791             cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2792 }
2793 
2794 int
2795 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2796 {
2797         ASSERT(cpuid_checkpass(cpu, 3));
2798 
2799         return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2800 }
2801 
2802 int
2803 cpuid_is_cmt(cpu_t *cpu)
2804 {
2805         if (cpu == NULL)
2806                 cpu = CPU;
2807 
2808         ASSERT(cpuid_checkpass(cpu, 1));
2809 
2810         return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2811 }
2812 
2813 /*
2814  * AMD and Intel both implement the 64-bit variant of the syscall
2815  * instruction (syscallq), so if there's -any- support for syscall,
2816  * cpuid currently says "yes, we support this".
2817  *
2818  * However, Intel decided to -not- implement the 32-bit variant of the
2819  * syscall instruction, so we provide a predicate to allow our caller
2820  * to test that subtlety here.
2821  *
2822  * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2823  *      even in the case where the hardware would in fact support it.
2824  */
2825 /*ARGSUSED*/
2826 int
2827 cpuid_syscall32_insn(cpu_t *cpu)
2828 {
2829         ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2830 
2831 #if !defined(__xpv)
2832         if (cpu == NULL)
2833                 cpu = CPU;
2834 
2835         /*CSTYLED*/
2836         {
2837                 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2838 
2839                 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2840                     cpi->cpi_xmaxeax >= 0x80000001 &&
2841                     (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2842                         return (1);
2843         }
2844 #endif
2845         return (0);
2846 }
2847 
2848 int
2849 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2850 {
2851         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2852 
2853         static const char fmt[] =
2854             "x86 (%s %X family %d model %d step %d clock %d MHz)";
2855         static const char fmt_ht[] =
2856             "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2857 
2858         ASSERT(cpuid_checkpass(cpu, 1));
2859 
2860         if (cpuid_is_cmt(cpu))
2861                 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2862                     cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2863                     cpi->cpi_family, cpi->cpi_model,
2864                     cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2865         return (snprintf(s, n, fmt,
2866             cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2867             cpi->cpi_family, cpi->cpi_model,
2868             cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2869 }
2870 
2871 const char *
2872 cpuid_getvendorstr(cpu_t *cpu)
2873 {
2874         ASSERT(cpuid_checkpass(cpu, 1));
2875         return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2876 }
2877 
2878 uint_t
2879 cpuid_getvendor(cpu_t *cpu)
2880 {
2881         ASSERT(cpuid_checkpass(cpu, 1));
2882         return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2883 }
2884 
2885 uint_t
2886 cpuid_getfamily(cpu_t *cpu)
2887 {
2888         ASSERT(cpuid_checkpass(cpu, 1));
2889         return (cpu->cpu_m.mcpu_cpi->cpi_family);
2890 }
2891 
2892 uint_t
2893 cpuid_getmodel(cpu_t *cpu)
2894 {
2895         ASSERT(cpuid_checkpass(cpu, 1));
2896         return (cpu->cpu_m.mcpu_cpi->cpi_model);
2897 }
2898 
2899 uint_t
2900 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2901 {
2902         ASSERT(cpuid_checkpass(cpu, 1));
2903         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2904 }
2905 
2906 uint_t
2907 cpuid_get_ncore_per_chip(cpu_t *cpu)
2908 {
2909         ASSERT(cpuid_checkpass(cpu, 1));
2910         return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2911 }
2912 
2913 uint_t
2914 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2915 {
2916         ASSERT(cpuid_checkpass(cpu, 2));
2917         return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2918 }
2919 
2920 id_t
2921 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2922 {
2923         ASSERT(cpuid_checkpass(cpu, 2));
2924         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2925 }
2926 
2927 uint_t
2928 cpuid_getstep(cpu_t *cpu)
2929 {
2930         ASSERT(cpuid_checkpass(cpu, 1));
2931         return (cpu->cpu_m.mcpu_cpi->cpi_step);
2932 }
2933 
2934 uint_t
2935 cpuid_getsig(struct cpu *cpu)
2936 {
2937         ASSERT(cpuid_checkpass(cpu, 1));
2938         return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2939 }
2940 
2941 uint32_t
2942 cpuid_getchiprev(struct cpu *cpu)
2943 {
2944         ASSERT(cpuid_checkpass(cpu, 1));
2945         return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2946 }
2947 
2948 const char *
2949 cpuid_getchiprevstr(struct cpu *cpu)
2950 {
2951         ASSERT(cpuid_checkpass(cpu, 1));
2952         return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2953 }
2954 
2955 uint32_t
2956 cpuid_getsockettype(struct cpu *cpu)
2957 {
2958         ASSERT(cpuid_checkpass(cpu, 1));
2959         return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2960 }
2961 
2962 const char *
2963 cpuid_getsocketstr(cpu_t *cpu)
2964 {
2965         static const char *socketstr = NULL;
2966         struct cpuid_info *cpi;
2967 
2968         ASSERT(cpuid_checkpass(cpu, 1));
2969         cpi = cpu->cpu_m.mcpu_cpi;
2970 
2971         /* Assume that socket types are the same across the system */
2972         if (socketstr == NULL)
2973                 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2974                     cpi->cpi_model, cpi->cpi_step);
2975 
2976 
2977         return (socketstr);
2978 }
2979 
2980 int
2981 cpuid_get_chipid(cpu_t *cpu)
2982 {
2983         ASSERT(cpuid_checkpass(cpu, 1));
2984 
2985         if (cpuid_is_cmt(cpu))
2986                 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2987         return (cpu->cpu_id);
2988 }
2989 
2990 id_t
2991 cpuid_get_coreid(cpu_t *cpu)
2992 {
2993         ASSERT(cpuid_checkpass(cpu, 1));
2994         return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2995 }
2996 
2997 int
2998 cpuid_get_pkgcoreid(cpu_t *cpu)
2999 {
3000         ASSERT(cpuid_checkpass(cpu, 1));
3001         return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
3002 }
3003 
3004 int
3005 cpuid_get_clogid(cpu_t *cpu)
3006 {
3007         ASSERT(cpuid_checkpass(cpu, 1));
3008         return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
3009 }
3010 
3011 int
3012 cpuid_get_cacheid(cpu_t *cpu)
3013 {
3014         ASSERT(cpuid_checkpass(cpu, 1));
3015         return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
3016 }
3017 
3018 uint_t
3019 cpuid_get_procnodeid(cpu_t *cpu)
3020 {
3021         ASSERT(cpuid_checkpass(cpu, 1));
3022         return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
3023 }
3024 
3025 uint_t
3026 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3027 {
3028         ASSERT(cpuid_checkpass(cpu, 1));
3029         return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3030 }
3031 
3032 uint_t
3033 cpuid_get_compunitid(cpu_t *cpu)
3034 {
3035         ASSERT(cpuid_checkpass(cpu, 1));
3036         return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3037 }
3038 
3039 uint_t
3040 cpuid_get_cores_per_compunit(cpu_t *cpu)
3041 {
3042         ASSERT(cpuid_checkpass(cpu, 1));
3043         return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3044 }
3045 
3046 /*ARGSUSED*/
3047 int
3048 cpuid_have_cr8access(cpu_t *cpu)
3049 {
3050 #if defined(__amd64)
3051         return (1);
3052 #else
3053         struct cpuid_info *cpi;
3054 
3055         ASSERT(cpu != NULL);
3056         cpi = cpu->cpu_m.mcpu_cpi;
3057         if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3058             (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3059                 return (1);
3060         return (0);
3061 #endif
3062 }
3063 
3064 uint32_t
3065 cpuid_get_apicid(cpu_t *cpu)
3066 {
3067         ASSERT(cpuid_checkpass(cpu, 1));
3068         if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3069                 return (UINT32_MAX);
3070         } else {
3071                 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3072         }
3073 }
3074 
3075 void
3076 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3077 {
3078         struct cpuid_info *cpi;
3079 
3080         if (cpu == NULL)
3081                 cpu = CPU;
3082         cpi = cpu->cpu_m.mcpu_cpi;
3083 
3084         ASSERT(cpuid_checkpass(cpu, 1));
3085 
3086         if (pabits)
3087                 *pabits = cpi->cpi_pabits;
3088         if (vabits)
3089                 *vabits = cpi->cpi_vabits;
3090 }
3091 
3092 /*
3093  * Returns the number of data TLB entries for a corresponding
3094  * pagesize.  If it can't be computed, or isn't known, the
3095  * routine returns zero.  If you ask about an architecturally
3096  * impossible pagesize, the routine will panic (so that the
3097  * hat implementor knows that things are inconsistent.)
3098  */
3099 uint_t
3100 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3101 {
3102         struct cpuid_info *cpi;
3103         uint_t dtlb_nent = 0;
3104 
3105         if (cpu == NULL)
3106                 cpu = CPU;
3107         cpi = cpu->cpu_m.mcpu_cpi;
3108 
3109         ASSERT(cpuid_checkpass(cpu, 1));
3110 
3111         /*
3112          * Check the L2 TLB info
3113          */
3114         if (cpi->cpi_xmaxeax >= 0x80000006) {
3115                 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3116 
3117                 switch (pagesize) {
3118 
3119                 case 4 * 1024:
3120                         /*
3121                          * All zero in the top 16 bits of the register
3122                          * indicates a unified TLB. Size is in low 16 bits.
3123                          */
3124                         if ((cp->cp_ebx & 0xffff0000) == 0)
3125                                 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3126                         else
3127                                 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3128                         break;
3129 
3130                 case 2 * 1024 * 1024:
3131                         if ((cp->cp_eax & 0xffff0000) == 0)
3132                                 dtlb_nent = cp->cp_eax & 0x0000ffff;
3133                         else
3134                                 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3135                         break;
3136 
3137                 default:
3138                         panic("unknown L2 pagesize");
3139                         /*NOTREACHED*/
3140                 }
3141         }
3142 
3143         if (dtlb_nent != 0)
3144                 return (dtlb_nent);
3145 
3146         /*
3147          * No L2 TLB support for this size, try L1.
3148          */
3149         if (cpi->cpi_xmaxeax >= 0x80000005) {
3150                 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3151 
3152                 switch (pagesize) {
3153                 case 4 * 1024:
3154                         dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3155                         break;
3156                 case 2 * 1024 * 1024:
3157                         dtlb_nent = BITX(cp->cp_eax, 23, 16);
3158                         break;
3159                 default:
3160                         panic("unknown L1 d-TLB pagesize");
3161                         /*NOTREACHED*/
3162                 }
3163         }
3164 
3165         return (dtlb_nent);
3166 }
3167 
3168 /*
3169  * Return 0 if the erratum is not present or not applicable, positive
3170  * if it is, and negative if the status of the erratum is unknown.
3171  *
3172  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3173  * Processors" #25759, Rev 3.57, August 2005
3174  */
3175 int
3176 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3177 {
3178         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3179         uint_t eax;
3180 
3181         /*
3182          * Bail out if this CPU isn't an AMD CPU, or if it's
3183          * a legacy (32-bit) AMD CPU.
3184          */
3185         if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3186             cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3187             cpi->cpi_family == 6)
3188 
3189                 return (0);
3190 
3191         eax = cpi->cpi_std[1].cp_eax;
3192 
3193 #define SH_B0(eax)      (eax == 0xf40 || eax == 0xf50)
3194 #define SH_B3(eax)      (eax == 0xf51)
3195 #define B(eax)          (SH_B0(eax) || SH_B3(eax))
3196 
3197 #define SH_C0(eax)      (eax == 0xf48 || eax == 0xf58)
3198 
3199 #define SH_CG(eax)      (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3200 #define DH_CG(eax)      (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3201 #define CH_CG(eax)      (eax == 0xf82 || eax == 0xfb2)
3202 #define CG(eax)         (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3203 
3204 #define SH_D0(eax)      (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3205 #define DH_D0(eax)      (eax == 0x10fc0 || eax == 0x10ff0)
3206 #define CH_D0(eax)      (eax == 0x10f80 || eax == 0x10fb0)
3207 #define D0(eax)         (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3208 
3209 #define SH_E0(eax)      (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3210 #define JH_E1(eax)      (eax == 0x20f10)        /* JH8_E0 had 0x20f30 */
3211 #define DH_E3(eax)      (eax == 0x20fc0 || eax == 0x20ff0)
3212 #define SH_E4(eax)      (eax == 0x20f51 || eax == 0x20f71)
3213 #define BH_E4(eax)      (eax == 0x20fb1)
3214 #define SH_E5(eax)      (eax == 0x20f42)
3215 #define DH_E6(eax)      (eax == 0x20ff2 || eax == 0x20fc2)
3216 #define JH_E6(eax)      (eax == 0x20f12 || eax == 0x20f32)
3217 #define EX(eax)         (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3218                             SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3219                             DH_E6(eax) || JH_E6(eax))
3220 
3221 #define DR_AX(eax)      (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3222 #define DR_B0(eax)      (eax == 0x100f20)
3223 #define DR_B1(eax)      (eax == 0x100f21)
3224 #define DR_BA(eax)      (eax == 0x100f2a)
3225 #define DR_B2(eax)      (eax == 0x100f22)
3226 #define DR_B3(eax)      (eax == 0x100f23)
3227 #define RB_C0(eax)      (eax == 0x100f40)
3228 
3229         switch (erratum) {
3230         case 1:
3231                 return (cpi->cpi_family < 0x10);
3232         case 51:        /* what does the asterisk mean? */
3233                 return (B(eax) || SH_C0(eax) || CG(eax));
3234         case 52:
3235                 return (B(eax));
3236         case 57:
3237                 return (cpi->cpi_family <= 0x11);
3238         case 58:
3239                 return (B(eax));
3240         case 60:
3241                 return (cpi->cpi_family <= 0x11);
3242         case 61:
3243         case 62:
3244         case 63:
3245         case 64:
3246         case 65:
3247         case 66:
3248         case 68:
3249         case 69:
3250         case 70:
3251         case 71:
3252                 return (B(eax));
3253         case 72:
3254                 return (SH_B0(eax));
3255         case 74:
3256                 return (B(eax));
3257         case 75:
3258                 return (cpi->cpi_family < 0x10);
3259         case 76:
3260                 return (B(eax));
3261         case 77:
3262                 return (cpi->cpi_family <= 0x11);
3263         case 78:
3264                 return (B(eax) || SH_C0(eax));
3265         case 79:
3266                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3267         case 80:
3268         case 81:
3269         case 82:
3270                 return (B(eax));
3271         case 83:
3272                 return (B(eax) || SH_C0(eax) || CG(eax));
3273         case 85:
3274                 return (cpi->cpi_family < 0x10);
3275         case 86:
3276                 return (SH_C0(eax) || CG(eax));
3277         case 88:
3278 #if !defined(__amd64)
3279                 return (0);
3280 #else
3281                 return (B(eax) || SH_C0(eax));
3282 #endif
3283         case 89:
3284                 return (cpi->cpi_family < 0x10);
3285         case 90:
3286                 return (B(eax) || SH_C0(eax) || CG(eax));
3287         case 91:
3288         case 92:
3289                 return (B(eax) || SH_C0(eax));
3290         case 93:
3291                 return (SH_C0(eax));
3292         case 94:
3293                 return (B(eax) || SH_C0(eax) || CG(eax));
3294         case 95:
3295 #if !defined(__amd64)
3296                 return (0);
3297 #else
3298                 return (B(eax) || SH_C0(eax));
3299 #endif
3300         case 96:
3301                 return (B(eax) || SH_C0(eax) || CG(eax));
3302         case 97:
3303         case 98:
3304                 return (SH_C0(eax) || CG(eax));
3305         case 99:
3306                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3307         case 100:
3308                 return (B(eax) || SH_C0(eax));
3309         case 101:
3310         case 103:
3311                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3312         case 104:
3313                 return (SH_C0(eax) || CG(eax) || D0(eax));
3314         case 105:
3315         case 106:
3316         case 107:
3317                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3318         case 108:
3319                 return (DH_CG(eax));
3320         case 109:
3321                 return (SH_C0(eax) || CG(eax) || D0(eax));
3322         case 110:
3323                 return (D0(eax) || EX(eax));
3324         case 111:
3325                 return (CG(eax));
3326         case 112:
3327                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3328         case 113:
3329                 return (eax == 0x20fc0);
3330         case 114:
3331                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3332         case 115:
3333                 return (SH_E0(eax) || JH_E1(eax));
3334         case 116:
3335                 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3336         case 117:
3337                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3338         case 118:
3339                 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3340                     JH_E6(eax));
3341         case 121:
3342                 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3343         case 122:
3344                 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3345         case 123:
3346                 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3347         case 131:
3348                 return (cpi->cpi_family < 0x10);
3349         case 6336786:
3350                 /*
3351                  * Test for AdvPowerMgmtInfo.TscPStateInvariant
3352                  * if this is a K8 family or newer processor
3353                  */
3354                 if (CPI_FAMILY(cpi) == 0xf) {
3355                         struct cpuid_regs regs;
3356                         regs.cp_eax = 0x80000007;
3357                         (void) __cpuid_insn(&regs);
3358                         return (!(regs.cp_edx & 0x100));
3359                 }
3360                 return (0);
3361         case 6323525:
3362                 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3363                     (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3364 
3365         case 6671130:
3366                 /*
3367                  * check for processors (pre-Shanghai) that do not provide
3368                  * optimal management of 1gb ptes in its tlb.
3369                  */
3370                 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3371 
3372         case 298:
3373                 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3374                     DR_B2(eax) || RB_C0(eax));
3375 
3376         case 721:
3377 #if defined(__amd64)
3378                 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3379 #else
3380                 return (0);
3381 #endif
3382 
3383         default:
3384                 return (-1);
3385 
3386         }
3387 }
3388 
3389 /*
3390  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3391  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3392  */
3393 int
3394 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3395 {
3396         struct cpuid_info       *cpi;
3397         uint_t                  osvwid;
3398         static int              osvwfeature = -1;
3399         uint64_t                osvwlength;
3400 
3401 
3402         cpi = cpu->cpu_m.mcpu_cpi;
3403 
3404         /* confirm OSVW supported */
3405         if (osvwfeature == -1) {
3406                 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3407         } else {
3408                 /* assert that osvw feature setting is consistent on all cpus */
3409                 ASSERT(osvwfeature ==
3410                     (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3411         }
3412         if (!osvwfeature)
3413                 return (-1);
3414 
3415         osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3416 
3417         switch (erratum) {
3418         case 298:       /* osvwid is 0 */
3419                 osvwid = 0;
3420                 if (osvwlength <= (uint64_t)osvwid) {
3421                         /* osvwid 0 is unknown */
3422                         return (-1);
3423                 }
3424 
3425                 /*
3426                  * Check the OSVW STATUS MSR to determine the state
3427                  * of the erratum where:
3428                  *   0 - fixed by HW
3429                  *   1 - BIOS has applied the workaround when BIOS
3430                  *   workaround is available. (Or for other errata,
3431                  *   OS workaround is required.)
3432                  * For a value of 1, caller will confirm that the
3433                  * erratum 298 workaround has indeed been applied by BIOS.
3434                  *
3435                  * A 1 may be set in cpus that have a HW fix
3436                  * in a mixed cpu system. Regarding erratum 298:
3437                  *   In a multiprocessor platform, the workaround above
3438                  *   should be applied to all processors regardless of
3439                  *   silicon revision when an affected processor is
3440                  *   present.
3441                  */
3442 
3443                 return (rdmsr(MSR_AMD_OSVW_STATUS +
3444                     (osvwid / OSVW_ID_CNT_PER_MSR)) &
3445                     (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3446 
3447         default:
3448                 return (-1);
3449         }
3450 }
3451 
3452 static const char assoc_str[] = "associativity";
3453 static const char line_str[] = "line-size";
3454 static const char size_str[] = "size";
3455 
3456 static void
3457 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3458     uint32_t val)
3459 {
3460         char buf[128];
3461 
3462         /*
3463          * ndi_prop_update_int() is used because it is desirable for
3464          * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3465          */
3466         if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3467                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3468 }
3469 
3470 /*
3471  * Intel-style cache/tlb description
3472  *
3473  * Standard cpuid level 2 gives a randomly ordered
3474  * selection of tags that index into a table that describes
3475  * cache and tlb properties.
3476  */
3477 
3478 static const char l1_icache_str[] = "l1-icache";
3479 static const char l1_dcache_str[] = "l1-dcache";
3480 static const char l2_cache_str[] = "l2-cache";
3481 static const char l3_cache_str[] = "l3-cache";
3482 static const char itlb4k_str[] = "itlb-4K";
3483 static const char dtlb4k_str[] = "dtlb-4K";
3484 static const char itlb2M_str[] = "itlb-2M";
3485 static const char itlb4M_str[] = "itlb-4M";
3486 static const char dtlb4M_str[] = "dtlb-4M";
3487 static const char dtlb24_str[] = "dtlb0-2M-4M";
3488 static const char itlb424_str[] = "itlb-4K-2M-4M";
3489 static const char itlb24_str[] = "itlb-2M-4M";
3490 static const char dtlb44_str[] = "dtlb-4K-4M";
3491 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3492 static const char sl2_cache_str[] = "sectored-l2-cache";
3493 static const char itrace_str[] = "itrace-cache";
3494 static const char sl3_cache_str[] = "sectored-l3-cache";
3495 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3496 
3497 static const struct cachetab {
3498         uint8_t         ct_code;
3499         uint8_t         ct_assoc;
3500         uint16_t        ct_line_size;
3501         size_t          ct_size;
3502         const char      *ct_label;
3503 } intel_ctab[] = {
3504         /*
3505          * maintain descending order!
3506          *
3507          * Codes ignored - Reason
3508          * ----------------------
3509          * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3510          * f0H/f1H - Currently we do not interpret prefetch size by design
3511          */
3512         { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3513         { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3514         { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3515         { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3516         { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3517         { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3518         { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3519         { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3520         { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3521         { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3522         { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3523         { 0xd0, 4, 64, 512*1024, l3_cache_str},
3524         { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3525         { 0xc0, 4, 0, 8, dtlb44_str },
3526         { 0xba, 4, 0, 64, dtlb4k_str },
3527         { 0xb4, 4, 0, 256, dtlb4k_str },
3528         { 0xb3, 4, 0, 128, dtlb4k_str },
3529         { 0xb2, 4, 0, 64, itlb4k_str },
3530         { 0xb0, 4, 0, 128, itlb4k_str },
3531         { 0x87, 8, 64, 1024*1024, l2_cache_str},
3532         { 0x86, 4, 64, 512*1024, l2_cache_str},
3533         { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3534         { 0x84, 8, 32, 1024*1024, l2_cache_str},
3535         { 0x83, 8, 32, 512*1024, l2_cache_str},
3536         { 0x82, 8, 32, 256*1024, l2_cache_str},
3537         { 0x80, 8, 64, 512*1024, l2_cache_str},
3538         { 0x7f, 2, 64, 512*1024, l2_cache_str},
3539         { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3540         { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3541         { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3542         { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3543         { 0x79, 8, 64, 128*1024, sl2_cache_str},
3544         { 0x78, 8, 64, 1024*1024, l2_cache_str},
3545         { 0x73, 8, 0, 64*1024, itrace_str},
3546         { 0x72, 8, 0, 32*1024, itrace_str},
3547         { 0x71, 8, 0, 16*1024, itrace_str},
3548         { 0x70, 8, 0, 12*1024, itrace_str},
3549         { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3550         { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3551         { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3552         { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3553         { 0x5d, 0, 0, 256, dtlb44_str},
3554         { 0x5c, 0, 0, 128, dtlb44_str},
3555         { 0x5b, 0, 0, 64, dtlb44_str},
3556         { 0x5a, 4, 0, 32, dtlb24_str},
3557         { 0x59, 0, 0, 16, dtlb4k_str},
3558         { 0x57, 4, 0, 16, dtlb4k_str},
3559         { 0x56, 4, 0, 16, dtlb4M_str},
3560         { 0x55, 0, 0, 7, itlb24_str},
3561         { 0x52, 0, 0, 256, itlb424_str},
3562         { 0x51, 0, 0, 128, itlb424_str},
3563         { 0x50, 0, 0, 64, itlb424_str},
3564         { 0x4f, 0, 0, 32, itlb4k_str},
3565         { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3566         { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3567         { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3568         { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3569         { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3570         { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3571         { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3572         { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3573         { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3574         { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3575         { 0x44, 4, 32, 1024*1024, l2_cache_str},
3576         { 0x43, 4, 32, 512*1024, l2_cache_str},
3577         { 0x42, 4, 32, 256*1024, l2_cache_str},
3578         { 0x41, 4, 32, 128*1024, l2_cache_str},
3579         { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3580         { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3581         { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3582         { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3583         { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3584         { 0x39, 4, 64, 128*1024, sl2_cache_str},
3585         { 0x30, 8, 64, 32*1024, l1_icache_str},
3586         { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3587         { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3588         { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3589         { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3590         { 0x22, 4, 64, 512*1024, sl3_cache_str},
3591         { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3592         { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3593         { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3594         { 0x0b, 4, 0, 4, itlb4M_str},
3595         { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3596         { 0x08, 4, 32, 16*1024, l1_icache_str},
3597         { 0x06, 4, 32, 8*1024, l1_icache_str},
3598         { 0x05, 4, 0, 32, dtlb4M_str},
3599         { 0x04, 4, 0, 8, dtlb4M_str},
3600         { 0x03, 4, 0, 64, dtlb4k_str},
3601         { 0x02, 4, 0, 2, itlb4M_str},
3602         { 0x01, 4, 0, 32, itlb4k_str},
3603         { 0 }
3604 };
3605 
3606 static const struct cachetab cyrix_ctab[] = {
3607         { 0x70, 4, 0, 32, "tlb-4K" },
3608         { 0x80, 4, 16, 16*1024, "l1-cache" },
3609         { 0 }
3610 };
3611 
3612 /*
3613  * Search a cache table for a matching entry
3614  */
3615 static const struct cachetab *
3616 find_cacheent(const struct cachetab *ct, uint_t code)
3617 {
3618         if (code != 0) {
3619                 for (; ct->ct_code != 0; ct++)
3620                         if (ct->ct_code <= code)
3621                                 break;
3622                 if (ct->ct_code == code)
3623                         return (ct);
3624         }
3625         return (NULL);
3626 }
3627 
3628 /*
3629  * Populate cachetab entry with L2 or L3 cache-information using
3630  * cpuid function 4. This function is called from intel_walk_cacheinfo()
3631  * when descriptor 0x49 is encountered. It returns 0 if no such cache
3632  * information is found.
3633  */
3634 static int
3635 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3636 {
3637         uint32_t level, i;
3638         int ret = 0;
3639 
3640         for (i = 0; i < cpi->cpi_std_4_size; i++) {
3641                 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3642 
3643                 if (level == 2 || level == 3) {
3644                         ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3645                         ct->ct_line_size =
3646                             CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3647                         ct->ct_size = ct->ct_assoc *
3648                             (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3649                             ct->ct_line_size *
3650                             (cpi->cpi_std_4[i]->cp_ecx + 1);
3651 
3652                         if (level == 2) {
3653                                 ct->ct_label = l2_cache_str;
3654                         } else if (level == 3) {
3655                                 ct->ct_label = l3_cache_str;
3656                         }
3657                         ret = 1;
3658                 }
3659         }
3660 
3661         return (ret);
3662 }
3663 
3664 /*
3665  * Walk the cacheinfo descriptor, applying 'func' to every valid element
3666  * The walk is terminated if the walker returns non-zero.
3667  */
3668 static void
3669 intel_walk_cacheinfo(struct cpuid_info *cpi,
3670     void *arg, int (*func)(void *, const struct cachetab *))
3671 {
3672         const struct cachetab *ct;
3673         struct cachetab des_49_ct, des_b1_ct;
3674         uint8_t *dp;
3675         int i;
3676 
3677         if ((dp = cpi->cpi_cacheinfo) == NULL)
3678                 return;
3679         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3680                 /*
3681                  * For overloaded descriptor 0x49 we use cpuid function 4
3682                  * if supported by the current processor, to create
3683                  * cache information.
3684                  * For overloaded descriptor 0xb1 we use X86_PAE flag
3685                  * to disambiguate the cache information.
3686                  */
3687                 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3688                     intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3689                                 ct = &des_49_ct;
3690                 } else if (*dp == 0xb1) {
3691                         des_b1_ct.ct_code = 0xb1;
3692                         des_b1_ct.ct_assoc = 4;
3693                         des_b1_ct.ct_line_size = 0;
3694                         if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3695                                 des_b1_ct.ct_size = 8;
3696                                 des_b1_ct.ct_label = itlb2M_str;
3697                         } else {
3698                                 des_b1_ct.ct_size = 4;
3699                                 des_b1_ct.ct_label = itlb4M_str;
3700                         }
3701                         ct = &des_b1_ct;
3702                 } else {
3703                         if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3704                                 continue;
3705                         }
3706                 }
3707 
3708                 if (func(arg, ct) != 0) {
3709                         break;
3710                 }
3711         }
3712 }
3713 
3714 /*
3715  * (Like the Intel one, except for Cyrix CPUs)
3716  */
3717 static void
3718 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3719     void *arg, int (*func)(void *, const struct cachetab *))
3720 {
3721         const struct cachetab *ct;
3722         uint8_t *dp;
3723         int i;
3724 
3725         if ((dp = cpi->cpi_cacheinfo) == NULL)
3726                 return;
3727         for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3728                 /*
3729                  * Search Cyrix-specific descriptor table first ..
3730                  */
3731                 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3732                         if (func(arg, ct) != 0)
3733                                 break;
3734                         continue;
3735                 }
3736                 /*
3737                  * .. else fall back to the Intel one
3738                  */
3739                 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3740                         if (func(arg, ct) != 0)
3741                                 break;
3742                         continue;
3743                 }
3744         }
3745 }
3746 
3747 /*
3748  * A cacheinfo walker that adds associativity, line-size, and size properties
3749  * to the devinfo node it is passed as an argument.
3750  */
3751 static int
3752 add_cacheent_props(void *arg, const struct cachetab *ct)
3753 {
3754         dev_info_t *devi = arg;
3755 
3756         add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3757         if (ct->ct_line_size != 0)
3758                 add_cache_prop(devi, ct->ct_label, line_str,
3759                     ct->ct_line_size);
3760         add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3761         return (0);
3762 }
3763 
3764 
3765 static const char fully_assoc[] = "fully-associative?";
3766 
3767 /*
3768  * AMD style cache/tlb description
3769  *
3770  * Extended functions 5 and 6 directly describe properties of
3771  * tlbs and various cache levels.
3772  */
3773 static void
3774 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3775 {
3776         switch (assoc) {
3777         case 0: /* reserved; ignore */
3778                 break;
3779         default:
3780                 add_cache_prop(devi, label, assoc_str, assoc);
3781                 break;
3782         case 0xff:
3783                 add_cache_prop(devi, label, fully_assoc, 1);
3784                 break;
3785         }
3786 }
3787 
3788 static void
3789 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3790 {
3791         if (size == 0)
3792                 return;
3793         add_cache_prop(devi, label, size_str, size);
3794         add_amd_assoc(devi, label, assoc);
3795 }
3796 
3797 static void
3798 add_amd_cache(dev_info_t *devi, const char *label,
3799     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3800 {
3801         if (size == 0 || line_size == 0)
3802                 return;
3803         add_amd_assoc(devi, label, assoc);
3804         /*
3805          * Most AMD parts have a sectored cache. Multiple cache lines are
3806          * associated with each tag. A sector consists of all cache lines
3807          * associated with a tag. For example, the AMD K6-III has a sector
3808          * size of 2 cache lines per tag.
3809          */
3810         if (lines_per_tag != 0)
3811                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3812         add_cache_prop(devi, label, line_str, line_size);
3813         add_cache_prop(devi, label, size_str, size * 1024);
3814 }
3815 
3816 static void
3817 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3818 {
3819         switch (assoc) {
3820         case 0: /* off */
3821                 break;
3822         case 1:
3823         case 2:
3824         case 4:
3825                 add_cache_prop(devi, label, assoc_str, assoc);
3826                 break;
3827         case 6:
3828                 add_cache_prop(devi, label, assoc_str, 8);
3829                 break;
3830         case 8:
3831                 add_cache_prop(devi, label, assoc_str, 16);
3832                 break;
3833         case 0xf:
3834                 add_cache_prop(devi, label, fully_assoc, 1);
3835                 break;
3836         default: /* reserved; ignore */
3837                 break;
3838         }
3839 }
3840 
3841 static void
3842 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3843 {
3844         if (size == 0 || assoc == 0)
3845                 return;
3846         add_amd_l2_assoc(devi, label, assoc);
3847         add_cache_prop(devi, label, size_str, size);
3848 }
3849 
3850 static void
3851 add_amd_l2_cache(dev_info_t *devi, const char *label,
3852     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3853 {
3854         if (size == 0 || assoc == 0 || line_size == 0)
3855                 return;
3856         add_amd_l2_assoc(devi, label, assoc);
3857         if (lines_per_tag != 0)
3858                 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3859         add_cache_prop(devi, label, line_str, line_size);
3860         add_cache_prop(devi, label, size_str, size * 1024);
3861 }
3862 
3863 static void
3864 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3865 {
3866         struct cpuid_regs *cp;
3867 
3868         if (cpi->cpi_xmaxeax < 0x80000005)
3869                 return;
3870         cp = &cpi->cpi_extd[5];
3871 
3872         /*
3873          * 4M/2M L1 TLB configuration
3874          *
3875          * We report the size for 2M pages because AMD uses two
3876          * TLB entries for one 4M page.
3877          */
3878         add_amd_tlb(devi, "dtlb-2M",
3879             BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3880         add_amd_tlb(devi, "itlb-2M",
3881             BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3882 
3883         /*
3884          * 4K L1 TLB configuration
3885          */
3886 
3887         switch (cpi->cpi_vendor) {
3888                 uint_t nentries;
3889         case X86_VENDOR_TM:
3890                 if (cpi->cpi_family >= 5) {
3891                         /*
3892                          * Crusoe processors have 256 TLB entries, but
3893                          * cpuid data format constrains them to only
3894                          * reporting 255 of them.
3895                          */
3896                         if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3897                                 nentries = 256;
3898                         /*
3899                          * Crusoe processors also have a unified TLB
3900                          */
3901                         add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3902                             nentries);
3903                         break;
3904                 }
3905                 /*FALLTHROUGH*/
3906         default:
3907                 add_amd_tlb(devi, itlb4k_str,
3908                     BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3909                 add_amd_tlb(devi, dtlb4k_str,
3910                     BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3911                 break;
3912         }
3913 
3914         /*
3915          * data L1 cache configuration
3916          */
3917 
3918         add_amd_cache(devi, l1_dcache_str,
3919             BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3920             BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3921 
3922         /*
3923          * code L1 cache configuration
3924          */
3925 
3926         add_amd_cache(devi, l1_icache_str,
3927             BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3928             BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3929 
3930         if (cpi->cpi_xmaxeax < 0x80000006)
3931                 return;
3932         cp = &cpi->cpi_extd[6];
3933 
3934         /* Check for a unified L2 TLB for large pages */
3935 
3936         if (BITX(cp->cp_eax, 31, 16) == 0)
3937                 add_amd_l2_tlb(devi, "l2-tlb-2M",
3938                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3939         else {
3940                 add_amd_l2_tlb(devi, "l2-dtlb-2M",
3941                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3942                 add_amd_l2_tlb(devi, "l2-itlb-2M",
3943                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3944         }
3945 
3946         /* Check for a unified L2 TLB for 4K pages */
3947 
3948         if (BITX(cp->cp_ebx, 31, 16) == 0) {
3949                 add_amd_l2_tlb(devi, "l2-tlb-4K",
3950                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3951         } else {
3952                 add_amd_l2_tlb(devi, "l2-dtlb-4K",
3953                     BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3954                 add_amd_l2_tlb(devi, "l2-itlb-4K",
3955                     BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3956         }
3957 
3958         add_amd_l2_cache(devi, l2_cache_str,
3959             BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3960             BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3961 }
3962 
3963 /*
3964  * There are two basic ways that the x86 world describes it cache
3965  * and tlb architecture - Intel's way and AMD's way.
3966  *
3967  * Return which flavor of cache architecture we should use
3968  */
3969 static int
3970 x86_which_cacheinfo(struct cpuid_info *cpi)
3971 {
3972         switch (cpi->cpi_vendor) {
3973         case X86_VENDOR_Intel:
3974                 if (cpi->cpi_maxeax >= 2)
3975                         return (X86_VENDOR_Intel);
3976                 break;
3977         case X86_VENDOR_AMD:
3978                 /*
3979                  * The K5 model 1 was the first part from AMD that reported
3980                  * cache sizes via extended cpuid functions.
3981                  */
3982                 if (cpi->cpi_family > 5 ||
3983                     (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3984                         return (X86_VENDOR_AMD);
3985                 break;
3986         case X86_VENDOR_TM:
3987                 if (cpi->cpi_family >= 5)
3988                         return (X86_VENDOR_AMD);
3989                 /*FALLTHROUGH*/
3990         default:
3991                 /*
3992                  * If they have extended CPU data for 0x80000005
3993                  * then we assume they have AMD-format cache
3994                  * information.
3995                  *
3996                  * If not, and the vendor happens to be Cyrix,
3997                  * then try our-Cyrix specific handler.
3998                  *
3999                  * If we're not Cyrix, then assume we're using Intel's
4000                  * table-driven format instead.
4001                  */
4002                 if (cpi->cpi_xmaxeax >= 0x80000005)
4003                         return (X86_VENDOR_AMD);
4004                 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
4005                         return (X86_VENDOR_Cyrix);
4006                 else if (cpi->cpi_maxeax >= 2)
4007                         return (X86_VENDOR_Intel);
4008                 break;
4009         }
4010         return (-1);
4011 }
4012 
4013 void
4014 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
4015     struct cpuid_info *cpi)
4016 {
4017         dev_info_t *cpu_devi;
4018         int create;
4019 
4020         cpu_devi = (dev_info_t *)dip;
4021 
4022         /* device_type */
4023         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4024             "device_type", "cpu");
4025 
4026         /* reg */
4027         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4028             "reg", cpu_id);
4029 
4030         /* cpu-mhz, and clock-frequency */
4031         if (cpu_freq > 0) {
4032                 long long mul;
4033 
4034                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4035                     "cpu-mhz", cpu_freq);
4036                 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
4037                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4038                             "clock-frequency", (int)mul);
4039         }
4040 
4041         if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4042                 return;
4043         }
4044 
4045         /* vendor-id */
4046         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4047             "vendor-id", cpi->cpi_vendorstr);
4048 
4049         if (cpi->cpi_maxeax == 0) {
4050                 return;
4051         }
4052 
4053         /*
4054          * family, model, and step
4055          */
4056         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4057             "family", CPI_FAMILY(cpi));
4058         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4059             "cpu-model", CPI_MODEL(cpi));
4060         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4061             "stepping-id", CPI_STEP(cpi));
4062 
4063         /* type */
4064         switch (cpi->cpi_vendor) {
4065         case X86_VENDOR_Intel:
4066                 create = 1;
4067                 break;
4068         default:
4069                 create = 0;
4070                 break;
4071         }
4072         if (create)
4073                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4074                     "type", CPI_TYPE(cpi));
4075 
4076         /* ext-family */
4077         switch (cpi->cpi_vendor) {
4078         case X86_VENDOR_Intel:
4079         case X86_VENDOR_AMD:
4080                 create = cpi->cpi_family >= 0xf;
4081                 break;
4082         default:
4083                 create = 0;
4084                 break;
4085         }
4086         if (create)
4087                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4088                     "ext-family", CPI_FAMILY_XTD(cpi));
4089 
4090         /* ext-model */
4091         switch (cpi->cpi_vendor) {
4092         case X86_VENDOR_Intel:
4093                 create = IS_EXTENDED_MODEL_INTEL(cpi);
4094                 break;
4095         case X86_VENDOR_AMD:
4096                 create = CPI_FAMILY(cpi) == 0xf;
4097                 break;
4098         default:
4099                 create = 0;
4100                 break;
4101         }
4102         if (create)
4103                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4104                     "ext-model", CPI_MODEL_XTD(cpi));
4105 
4106         /* generation */
4107         switch (cpi->cpi_vendor) {
4108         case X86_VENDOR_AMD:
4109                 /*
4110                  * AMD K5 model 1 was the first part to support this
4111                  */
4112                 create = cpi->cpi_xmaxeax >= 0x80000001;
4113                 break;
4114         default:
4115                 create = 0;
4116                 break;
4117         }
4118         if (create)
4119                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4120                     "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4121 
4122         /* brand-id */
4123         switch (cpi->cpi_vendor) {
4124         case X86_VENDOR_Intel:
4125                 /*
4126                  * brand id first appeared on Pentium III Xeon model 8,
4127                  * and Celeron model 8 processors and Opteron
4128                  */
4129                 create = cpi->cpi_family > 6 ||
4130                     (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4131                 break;
4132         case X86_VENDOR_AMD:
4133                 create = cpi->cpi_family >= 0xf;
4134                 break;
4135         default:
4136                 create = 0;
4137                 break;
4138         }
4139         if (create && cpi->cpi_brandid != 0) {
4140                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4141                     "brand-id", cpi->cpi_brandid);
4142         }
4143 
4144         /* chunks, and apic-id */
4145         switch (cpi->cpi_vendor) {
4146                 /*
4147                  * first available on Pentium IV and Opteron (K8)
4148                  */
4149         case X86_VENDOR_Intel:
4150                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4151                 break;
4152         case X86_VENDOR_AMD:
4153                 create = cpi->cpi_family >= 0xf;
4154                 break;
4155         default:
4156                 create = 0;
4157                 break;
4158         }
4159         if (create) {
4160                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4161                     "chunks", CPI_CHUNKS(cpi));
4162                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4163                     "apic-id", cpi->cpi_apicid);
4164                 if (cpi->cpi_chipid >= 0) {
4165                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4166                             "chip#", cpi->cpi_chipid);
4167                         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4168                             "clog#", cpi->cpi_clogid);
4169                 }
4170         }
4171 
4172         /* cpuid-features */
4173         (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4174             "cpuid-features", CPI_FEATURES_EDX(cpi));
4175 
4176 
4177         /* cpuid-features-ecx */
4178         switch (cpi->cpi_vendor) {
4179         case X86_VENDOR_Intel:
4180                 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4181                 break;
4182         case X86_VENDOR_AMD:
4183                 create = cpi->cpi_family >= 0xf;
4184                 break;
4185         default:
4186                 create = 0;
4187                 break;
4188         }
4189         if (create)
4190                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4191                     "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4192 
4193         /* ext-cpuid-features */
4194         switch (cpi->cpi_vendor) {
4195         case X86_VENDOR_Intel:
4196         case X86_VENDOR_AMD:
4197         case X86_VENDOR_Cyrix:
4198         case X86_VENDOR_TM:
4199         case X86_VENDOR_Centaur:
4200                 create = cpi->cpi_xmaxeax >= 0x80000001;
4201                 break;
4202         default:
4203                 create = 0;
4204                 break;
4205         }
4206         if (create) {
4207                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4208                     "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4209                 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4210                     "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4211         }
4212 
4213         /*
4214          * Brand String first appeared in Intel Pentium IV, AMD K5
4215          * model 1, and Cyrix GXm.  On earlier models we try and
4216          * simulate something similar .. so this string should always
4217          * same -something- about the processor, however lame.
4218          */
4219         (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4220             "brand-string", cpi->cpi_brandstr);
4221 
4222         /*
4223          * Finally, cache and tlb information
4224          */
4225         switch (x86_which_cacheinfo(cpi)) {
4226         case X86_VENDOR_Intel:
4227                 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4228                 break;
4229         case X86_VENDOR_Cyrix:
4230                 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4231                 break;
4232         case X86_VENDOR_AMD:
4233                 amd_cache_info(cpi, cpu_devi);
4234                 break;
4235         default:
4236                 break;
4237         }
4238 }
4239 
4240 struct l2info {
4241         int *l2i_csz;
4242         int *l2i_lsz;
4243         int *l2i_assoc;
4244         int l2i_ret;
4245 };
4246 
4247 /*
4248  * A cacheinfo walker that fetches the size, line-size and associativity
4249  * of the L2 cache
4250  */
4251 static int
4252 intel_l2cinfo(void *arg, const struct cachetab *ct)
4253 {
4254         struct l2info *l2i = arg;
4255         int *ip;
4256 
4257         if (ct->ct_label != l2_cache_str &&
4258             ct->ct_label != sl2_cache_str)
4259                 return (0);     /* not an L2 -- keep walking */
4260 
4261         if ((ip = l2i->l2i_csz) != NULL)
4262                 *ip = ct->ct_size;
4263         if ((ip = l2i->l2i_lsz) != NULL)
4264                 *ip = ct->ct_line_size;
4265         if ((ip = l2i->l2i_assoc) != NULL)
4266                 *ip = ct->ct_assoc;
4267         l2i->l2i_ret = ct->ct_size;
4268         return (1);             /* was an L2 -- terminate walk */
4269 }
4270 
4271 /*
4272  * AMD L2/L3 Cache and TLB Associativity Field Definition:
4273  *
4274  *      Unlike the associativity for the L1 cache and tlb where the 8 bit
4275  *      value is the associativity, the associativity for the L2 cache and
4276  *      tlb is encoded in the following table. The 4 bit L2 value serves as
4277  *      an index into the amd_afd[] array to determine the associativity.
4278  *      -1 is undefined. 0 is fully associative.
4279  */
4280 
4281 static int amd_afd[] =
4282         {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4283 
4284 static void
4285 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4286 {
4287         struct cpuid_regs *cp;
4288         uint_t size, assoc;
4289         int i;
4290         int *ip;
4291 
4292         if (cpi->cpi_xmaxeax < 0x80000006)
4293                 return;
4294         cp = &cpi->cpi_extd[6];
4295 
4296         if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4297             (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4298                 uint_t cachesz = size * 1024;
4299                 assoc = amd_afd[i];
4300 
4301                 ASSERT(assoc != -1);
4302 
4303                 if ((ip = l2i->l2i_csz) != NULL)
4304                         *ip = cachesz;
4305                 if ((ip = l2i->l2i_lsz) != NULL)
4306                         *ip = BITX(cp->cp_ecx, 7, 0);
4307                 if ((ip = l2i->l2i_assoc) != NULL)
4308                         *ip = assoc;
4309                 l2i->l2i_ret = cachesz;
4310         }
4311 }
4312 
4313 int
4314 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4315 {
4316         struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4317         struct l2info __l2info, *l2i = &__l2info;
4318 
4319         l2i->l2i_csz = csz;
4320         l2i->l2i_lsz = lsz;
4321         l2i->l2i_assoc = assoc;
4322         l2i->l2i_ret = -1;
4323 
4324         switch (x86_which_cacheinfo(cpi)) {
4325         case X86_VENDOR_Intel:
4326                 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4327                 break;
4328         case X86_VENDOR_Cyrix:
4329                 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4330                 break;
4331         case X86_VENDOR_AMD:
4332                 amd_l2cacheinfo(cpi, l2i);
4333                 break;
4334         default:
4335                 break;
4336         }
4337         return (l2i->l2i_ret);
4338 }
4339 
4340 #if !defined(__xpv)
4341 
4342 uint32_t *
4343 cpuid_mwait_alloc(cpu_t *cpu)
4344 {
4345         uint32_t        *ret;
4346         size_t          mwait_size;
4347 
4348         ASSERT(cpuid_checkpass(CPU, 2));
4349 
4350         mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4351         if (mwait_size == 0)
4352                 return (NULL);
4353 
4354         /*
4355          * kmem_alloc() returns cache line size aligned data for mwait_size
4356          * allocations.  mwait_size is currently cache line sized.  Neither
4357          * of these implementation details are guarantied to be true in the
4358          * future.
4359          *
4360          * First try allocating mwait_size as kmem_alloc() currently returns
4361          * correctly aligned memory.  If kmem_alloc() does not return
4362          * mwait_size aligned memory, then use mwait_size ROUNDUP.
4363          *
4364          * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4365          * decide to free this memory.
4366          */
4367         ret = kmem_zalloc(mwait_size, KM_SLEEP);
4368         if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4369                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4370                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4371                 *ret = MWAIT_RUNNING;
4372                 return (ret);
4373         } else {
4374                 kmem_free(ret, mwait_size);
4375                 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4376                 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4377                 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4378                 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4379                 *ret = MWAIT_RUNNING;
4380                 return (ret);
4381         }
4382 }
4383 
4384 void
4385 cpuid_mwait_free(cpu_t *cpu)
4386 {
4387         if (cpu->cpu_m.mcpu_cpi == NULL) {
4388                 return;
4389         }
4390 
4391         if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4392             cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4393                 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4394                     cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4395         }
4396 
4397         cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4398         cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4399 }
4400 
4401 void
4402 patch_tsc_read(int flag)
4403 {
4404         size_t cnt;
4405 
4406         switch (flag) {
4407         case X86_NO_TSC:
4408                 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4409                 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4410                 break;
4411         case X86_HAVE_TSCP:
4412                 cnt = &_tscp_end - &_tscp_start;
4413                 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4414                 break;
4415         case X86_TSC_MFENCE:
4416                 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4417                 (void) memcpy((void *)tsc_read,
4418                     (void *)&_tsc_mfence_start, cnt);
4419                 break;
4420         case X86_TSC_LFENCE:
4421                 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4422                 (void) memcpy((void *)tsc_read,
4423                     (void *)&_tsc_lfence_start, cnt);
4424                 break;
4425         default:
4426                 break;
4427         }
4428 }
4429 
4430 int
4431 cpuid_deep_cstates_supported(void)
4432 {
4433         struct cpuid_info *cpi;
4434         struct cpuid_regs regs;
4435 
4436         ASSERT(cpuid_checkpass(CPU, 1));
4437 
4438         cpi = CPU->cpu_m.mcpu_cpi;
4439 
4440         if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4441                 return (0);
4442 
4443         switch (cpi->cpi_vendor) {
4444         case X86_VENDOR_Intel:
4445                 if (cpi->cpi_xmaxeax < 0x80000007)
4446                         return (0);
4447 
4448                 /*
4449                  * TSC run at a constant rate in all ACPI C-states?
4450                  */
4451                 regs.cp_eax = 0x80000007;
4452                 (void) __cpuid_insn(&regs);
4453                 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4454 
4455         default:
4456                 return (0);
4457         }
4458 }
4459 
4460 #endif  /* !__xpv */
4461 
4462 void
4463 post_startup_cpu_fixups(void)
4464 {
4465 #ifndef __xpv
4466         /*
4467          * Some AMD processors support C1E state. Entering this state will
4468          * cause the local APIC timer to stop, which we can't deal with at
4469          * this time.
4470          */
4471         if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4472                 on_trap_data_t otd;
4473                 uint64_t reg;
4474 
4475                 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4476                         reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4477                         /* Disable C1E state if it is enabled by BIOS */
4478                         if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4479                             AMD_ACTONCMPHALT_MASK) {
4480                                 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4481                                     AMD_ACTONCMPHALT_SHIFT);
4482                                 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4483                         }
4484                 }
4485                 no_trap();
4486         }
4487 #endif  /* !__xpv */
4488 }
4489 
4490 /*
4491  * Setup necessary registers to enable XSAVE feature on this processor.
4492  * This function needs to be called early enough, so that no xsave/xrstor
4493  * ops will execute on the processor before the MSRs are properly set up.
4494  *
4495  * Current implementation has the following assumption:
4496  * - cpuid_pass1() is done, so that X86 features are known.
4497  * - fpu_probe() is done, so that fp_save_mech is chosen.
4498  */
4499 void
4500 xsave_setup_msr(cpu_t *cpu)
4501 {
4502         ASSERT(fp_save_mech == FP_XSAVE);
4503         ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4504 
4505         /* Enable OSXSAVE in CR4. */
4506         setcr4(getcr4() | CR4_OSXSAVE);
4507         /*
4508          * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4509          * correct value.
4510          */
4511         cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4512         setup_xfem();
4513 }
4514 
4515 /*
4516  * Starting with the Westmere processor the local
4517  * APIC timer will continue running in all C-states,
4518  * including the deepest C-states.
4519  */
4520 int
4521 cpuid_arat_supported(void)
4522 {
4523         struct cpuid_info *cpi;
4524         struct cpuid_regs regs;
4525 
4526         ASSERT(cpuid_checkpass(CPU, 1));
4527         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4528 
4529         cpi = CPU->cpu_m.mcpu_cpi;
4530 
4531         switch (cpi->cpi_vendor) {
4532         case X86_VENDOR_Intel:
4533                 /*
4534                  * Always-running Local APIC Timer is
4535                  * indicated by CPUID.6.EAX[2].
4536                  */
4537                 if (cpi->cpi_maxeax >= 6) {
4538                         regs.cp_eax = 6;
4539                         (void) cpuid_insn(NULL, &regs);
4540                         return (regs.cp_eax & CPUID_CSTATE_ARAT);
4541                 } else {
4542                         return (0);
4543                 }
4544         default:
4545                 return (0);
4546         }
4547 }
4548 
4549 /*
4550  * Check support for Intel ENERGY_PERF_BIAS feature
4551  */
4552 int
4553 cpuid_iepb_supported(struct cpu *cp)
4554 {
4555         struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4556         struct cpuid_regs regs;
4557 
4558         ASSERT(cpuid_checkpass(cp, 1));
4559 
4560         if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4561             !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4562                 return (0);
4563         }
4564 
4565         /*
4566          * Intel ENERGY_PERF_BIAS MSR is indicated by
4567          * capability bit CPUID.6.ECX.3
4568          */
4569         if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4570                 return (0);
4571 
4572         regs.cp_eax = 0x6;
4573         (void) cpuid_insn(NULL, &regs);
4574         return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4575 }
4576 
4577 /*
4578  * Check support for TSC deadline timer
4579  *
4580  * TSC deadline timer provides a superior software programming
4581  * model over local APIC timer that eliminates "time drifts".
4582  * Instead of specifying a relative time, software specifies an
4583  * absolute time as the target at which the processor should
4584  * generate a timer event.
4585  */
4586 int
4587 cpuid_deadline_tsc_supported(void)
4588 {
4589         struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4590         struct cpuid_regs regs;
4591 
4592         ASSERT(cpuid_checkpass(CPU, 1));
4593         ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4594 
4595         switch (cpi->cpi_vendor) {
4596         case X86_VENDOR_Intel:
4597                 if (cpi->cpi_maxeax >= 1) {
4598                         regs.cp_eax = 1;
4599                         (void) cpuid_insn(NULL, &regs);
4600                         return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4601                 } else {
4602                         return (0);
4603                 }
4604         default:
4605                 return (0);
4606         }
4607 }
4608 
4609 #if defined(__amd64) && !defined(__xpv)
4610 /*
4611  * Patch in versions of bcopy for high performance Intel Nhm processors
4612  * and later...
4613  */
4614 void
4615 patch_memops(uint_t vendor)
4616 {
4617         size_t cnt, i;
4618         caddr_t to, from;
4619 
4620         if ((vendor == X86_VENDOR_Intel) &&
4621             is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4622                 cnt = &bcopy_patch_end - &bcopy_patch_start;
4623                 to = &bcopy_ck_size;
4624                 from = &bcopy_patch_start;
4625                 for (i = 0; i < cnt; i++) {
4626                         *to++ = *from++;
4627                 }
4628         }
4629 }
4630 #endif  /* __amd64 && !__xpv */
4631 
4632 /*
4633  * This function finds the number of bits to represent the number of cores per
4634  * chip and the number of strands per core for the Intel platforms.
4635  * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4636  */
4637 void
4638 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4639 {
4640         struct cpuid_regs regs;
4641         struct cpuid_regs *cp = &regs;
4642 
4643         if (vendor != X86_VENDOR_Intel) {
4644                 return;
4645         }
4646 
4647         /* if the cpuid level is 0xB, extended topo is available. */
4648         cp->cp_eax = 0;
4649         if (__cpuid_insn(cp) >= 0xB) {
4650 
4651                 cp->cp_eax = 0xB;
4652                 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4653                 (void) __cpuid_insn(cp);
4654 
4655                 /*
4656                  * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4657                  * indicates that the extended topology enumeration leaf is
4658                  * available.
4659                  */
4660                 if (cp->cp_ebx) {
4661                         uint_t coreid_shift = 0;
4662                         uint_t chipid_shift = 0;
4663                         uint_t i;
4664                         uint_t level;
4665 
4666                         for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4667                                 cp->cp_eax = 0xB;
4668                                 cp->cp_ecx = i;
4669 
4670                                 (void) __cpuid_insn(cp);
4671                                 level = CPI_CPU_LEVEL_TYPE(cp);
4672 
4673                                 if (level == 1) {
4674                                         /*
4675                                          * Thread level processor topology
4676                                          * Number of bits shift right APIC ID
4677                                          * to get the coreid.
4678                                          */
4679                                         coreid_shift = BITX(cp->cp_eax, 4, 0);
4680                                 } else if (level == 2) {
4681                                         /*
4682                                          * Core level processor topology
4683                                          * Number of bits shift right APIC ID
4684                                          * to get the chipid.
4685                                          */
4686                                         chipid_shift = BITX(cp->cp_eax, 4, 0);
4687                                 }
4688                         }
4689 
4690                         if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4691                                 *strand_nbits = coreid_shift;
4692                                 *core_nbits = chipid_shift - coreid_shift;
4693                         }
4694                 }
4695         }
4696 }