Print this page
*** NO COMMENTS ***
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Portions Copyright 2009 Advanced Micro Devices, Inc.
31 31 */
32 32 /*
33 33 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
34 34 */
35 35 /*
36 36 * Various routines to handle identification
37 37 * and classification of x86 processors.
38 38 */
39 39
40 40 #include <sys/types.h>
41 41 #include <sys/archsystm.h>
42 42 #include <sys/x86_archext.h>
43 43 #include <sys/kmem.h>
44 44 #include <sys/systm.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/sunddi.h>
47 47 #include <sys/sunndi.h>
48 48 #include <sys/cpuvar.h>
49 49 #include <sys/processor.h>
50 50 #include <sys/sysmacros.h>
51 51 #include <sys/pg.h>
52 52 #include <sys/fp.h>
53 53 #include <sys/controlregs.h>
54 54 #include <sys/bitmap.h>
55 55 #include <sys/auxv_386.h>
56 56 #include <sys/memnode.h>
57 57 #include <sys/pci_cfgspace.h>
58 58
59 59 #ifdef __xpv
60 60 #include <sys/hypervisor.h>
61 61 #else
62 62 #include <sys/ontrap.h>
63 63 #endif
64 64
65 65 /*
66 66 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
67 67 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
68 68 * them accordingly. For most modern processors, feature detection occurs here
69 69 * in pass 1.
70 70 *
71 71 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
72 72 * for the boot CPU and does the basic analysis that the early kernel needs.
73 73 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
74 74 * CPU.
75 75 *
76 76 * Pass 1 includes:
77 77 *
78 78 * o Determining vendor/model/family/stepping and setting x86_type and
79 79 * x86_vendor accordingly.
80 80 * o Processing the feature flags returned by the cpuid instruction while
81 81 * applying any workarounds or tricks for the specific processor.
82 82 * o Mapping the feature flags into Solaris feature bits (X86_*).
83 83 * o Processing extended feature flags if supported by the processor,
84 84 * again while applying specific processor knowledge.
85 85 * o Determining the CMT characteristics of the system.
86 86 *
87 87 * Pass 1 is done on non-boot CPUs during their initialization and the results
88 88 * are used only as a meager attempt at ensuring that all processors within the
89 89 * system support the same features.
90 90 *
91 91 * Pass 2 of cpuid feature analysis happens just at the beginning
92 92 * of startup(). It just copies in and corrects the remainder
93 93 * of the cpuid data we depend on: standard cpuid functions that we didn't
94 94 * need for pass1 feature analysis, and extended cpuid functions beyond the
95 95 * simple feature processing done in pass1.
96 96 *
97 97 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
98 98 * particular kernel memory allocation has been made available. It creates a
99 99 * readable brand string based on the data collected in the first two passes.
100 100 *
101 101 * Pass 4 of cpuid analysis is invoked after post_startup() when all
102 102 * the support infrastructure for various hardware features has been
103 103 * initialized. It determines which processor features will be reported
104 104 * to userland via the aux vector.
105 105 *
106 106 * All passes are executed on all CPUs, but only the boot CPU determines what
107 107 * features the kernel will use.
108 108 *
109 109 * Much of the worst junk in this file is for the support of processors
110 110 * that didn't really implement the cpuid instruction properly.
111 111 *
112 112 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
113 113 * the pass numbers. Accordingly, changes to the pass code may require changes
114 114 * to the accessor code.
115 115 */
116 116
117 117 uint_t x86_vendor = X86_VENDOR_IntelClone;
118 118 uint_t x86_type = X86_TYPE_OTHER;
119 119 uint_t x86_clflush_size = 0;
120 120
121 121 uint_t pentiumpro_bug4046376;
122 122 uint_t pentiumpro_bug4064495;
123 123
124 124 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
125 125
126 126 static char *x86_feature_names[NUM_X86_FEATURES] = {
127 127 "lgpg",
128 128 "tsc",
129 129 "msr",
130 130 "mtrr",
131 131 "pge",
132 132 "de",
133 133 "cmov",
134 134 "mmx",
135 135 "mca",
136 136 "pae",
137 137 "cv8",
138 138 "pat",
139 139 "sep",
140 140 "sse",
141 141 "sse2",
142 142 "htt",
143 143 "asysc",
144 144 "nx",
145 145 "sse3",
146 146 "cx16",
147 147 "cmp",
148 148 "tscp",
149 149 "mwait",
150 150 "sse4a",
151 151 "cpuid",
152 152 "ssse3",
↓ open down ↓ |
152 lines elided |
↑ open up ↑ |
153 153 "sse4_1",
154 154 "sse4_2",
155 155 "1gpg",
156 156 "clfsh",
157 157 "64",
158 158 "aes",
159 159 "pclmulqdq",
160 160 "xsave",
161 161 "avx",
162 162 "vmx",
163 - "svm"
163 + "svm",
164 + "topoext"
164 165 };
165 166
166 167 boolean_t
167 168 is_x86_feature(void *featureset, uint_t feature)
168 169 {
169 170 ASSERT(feature < NUM_X86_FEATURES);
170 171 return (BT_TEST((ulong_t *)featureset, feature));
171 172 }
172 173
173 174 void
174 175 add_x86_feature(void *featureset, uint_t feature)
175 176 {
176 177 ASSERT(feature < NUM_X86_FEATURES);
177 178 BT_SET((ulong_t *)featureset, feature);
178 179 }
179 180
180 181 void
181 182 remove_x86_feature(void *featureset, uint_t feature)
182 183 {
183 184 ASSERT(feature < NUM_X86_FEATURES);
184 185 BT_CLEAR((ulong_t *)featureset, feature);
185 186 }
186 187
187 188 boolean_t
188 189 compare_x86_featureset(void *setA, void *setB)
189 190 {
190 191 /*
191 192 * We assume that the unused bits of the bitmap are always zero.
192 193 */
193 194 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
194 195 return (B_TRUE);
195 196 } else {
196 197 return (B_FALSE);
197 198 }
198 199 }
199 200
200 201 void
201 202 print_x86_featureset(void *featureset)
202 203 {
203 204 uint_t i;
204 205
205 206 for (i = 0; i < NUM_X86_FEATURES; i++) {
206 207 if (is_x86_feature(featureset, i)) {
207 208 cmn_err(CE_CONT, "?x86_feature: %s\n",
208 209 x86_feature_names[i]);
209 210 }
210 211 }
211 212 }
212 213
213 214 uint_t enable486;
214 215
215 216 static size_t xsave_state_size = 0;
216 217 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
217 218 boolean_t xsave_force_disable = B_FALSE;
218 219
219 220 /*
220 221 * This is set to platform type Solaris is running on.
221 222 */
222 223 static int platform_type = -1;
223 224
224 225 #if !defined(__xpv)
225 226 /*
226 227 * Variable to patch if hypervisor platform detection needs to be
227 228 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
228 229 */
229 230 int enable_platform_detection = 1;
230 231 #endif
231 232
232 233 /*
233 234 * monitor/mwait info.
234 235 *
235 236 * size_actual and buf_actual are the real address and size allocated to get
236 237 * proper mwait_buf alignement. buf_actual and size_actual should be passed
237 238 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
238 239 * processor cache-line alignment, but this is not guarantied in the furture.
239 240 */
240 241 struct mwait_info {
241 242 size_t mon_min; /* min size to avoid missed wakeups */
242 243 size_t mon_max; /* size to avoid false wakeups */
243 244 size_t size_actual; /* size actually allocated */
244 245 void *buf_actual; /* memory actually allocated */
245 246 uint32_t support; /* processor support of monitor/mwait */
246 247 };
247 248
248 249 /*
249 250 * xsave/xrestor info.
250 251 *
251 252 * This structure contains HW feature bits and size of the xsave save area.
252 253 * Note: the kernel will use the maximum size required for all hardware
253 254 * features. It is not optimize for potential memory savings if features at
254 255 * the end of the save area are not enabled.
255 256 */
256 257 struct xsave_info {
257 258 uint32_t xsav_hw_features_low; /* Supported HW features */
258 259 uint32_t xsav_hw_features_high; /* Supported HW features */
259 260 size_t xsav_max_size; /* max size save area for HW features */
260 261 size_t ymm_size; /* AVX: size of ymm save area */
261 262 size_t ymm_offset; /* AVX: offset for ymm save area */
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
262 263 };
263 264
264 265
265 266 /*
266 267 * These constants determine how many of the elements of the
267 268 * cpuid we cache in the cpuid_info data structure; the
268 269 * remaining elements are accessible via the cpuid instruction.
269 270 */
270 271
271 272 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
272 -#define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */
273 +#define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
273 274
274 275 /*
275 276 * Some terminology needs to be explained:
276 277 * - Socket: Something that can be plugged into a motherboard.
277 278 * - Package: Same as socket
278 279 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
279 280 * differently: there, chip is the same as processor node (below)
280 281 * - Processor node: Some AMD processors have more than one
281 282 * "subprocessor" embedded in a package. These subprocessors (nodes)
282 283 * are fully-functional processors themselves with cores, caches,
283 284 * memory controllers, PCI configuration spaces. They are connected
284 285 * inside the package with Hypertransport links. On single-node
285 286 * processors, processor node is equivalent to chip/socket/package.
287 + * - Compute Unit: Some AMD processors pair cores in "compute units" that
288 + * share the FPU and the L1I and L2 caches.
286 289 */
287 290
288 291 struct cpuid_info {
289 292 uint_t cpi_pass; /* last pass completed */
290 293 /*
291 294 * standard function information
292 295 */
293 296 uint_t cpi_maxeax; /* fn 0: %eax */
294 297 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
295 298 uint_t cpi_vendor; /* enum of cpi_vendorstr */
296 299
297 300 uint_t cpi_family; /* fn 1: extended family */
298 301 uint_t cpi_model; /* fn 1: extended model */
299 302 uint_t cpi_step; /* fn 1: stepping */
300 303 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
301 304 /* AMD: package/socket # */
302 305 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
303 306 int cpi_clogid; /* fn 1: %ebx: thread # */
304 307 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
305 308 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
306 309 uint_t cpi_ncache; /* fn 2: number of elements */
307 310 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
308 311 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
309 312 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
310 313 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
311 314 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */
312 315 /*
313 316 * extended function information
314 317 */
315 318 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
316 319 char cpi_brandstr[49]; /* fn 0x8000000[234] */
317 320 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
318 321 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
319 322 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
320 323
321 324 id_t cpi_coreid; /* same coreid => strands share core */
322 325 int cpi_pkgcoreid; /* core number within single package */
323 326 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
324 327 /* Intel: fn 4: %eax[31-26] */
325 328 /*
326 329 * supported feature information
327 330 */
328 331 uint32_t cpi_support[5];
329 332 #define STD_EDX_FEATURES 0
330 333 #define AMD_EDX_FEATURES 1
331 334 #define TM_EDX_FEATURES 2
332 335 #define STD_ECX_FEATURES 3
333 336 #define AMD_ECX_FEATURES 4
334 337 /*
335 338 * Synthesized information, where known.
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
336 339 */
337 340 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
338 341 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
339 342 uint32_t cpi_socket; /* Chip package/socket type */
340 343
341 344 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
342 345 uint32_t cpi_apicid;
343 346 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
344 347 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
345 348 /* Intel: 1 */
349 + uint_t cpi_compunitid; /* AMD: compute unit ID, Intel: coreid */
350 + uint_t cpi_cores_per_compunit; /* AMD: # of cores in the compute unit */
346 351
347 352 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
348 353 };
349 354
350 355
351 356 static struct cpuid_info cpuid_info0;
352 357
353 358 /*
354 359 * These bit fields are defined by the Intel Application Note AP-485
355 360 * "Intel Processor Identification and the CPUID Instruction"
356 361 */
357 362 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
358 363 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
359 364 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
360 365 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
361 366 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
362 367 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
363 368
364 369 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
365 370 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
366 371 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
367 372 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
368 373
369 374 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
370 375 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
371 376 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
372 377 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
373 378
374 379 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
375 380 #define CPI_XMAXEAX_MAX 0x80000100
376 381 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
377 382 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
378 383
379 384 /*
380 385 * Function 4 (Deterministic Cache Parameters) macros
381 386 * Defined by Intel Application Note AP-485
382 387 */
383 388 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
384 389 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
385 390 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
386 391 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
387 392 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
388 393 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
389 394 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
390 395
391 396 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
392 397 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
393 398 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
394 399
395 400 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
396 401
397 402 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
398 403
399 404
400 405 /*
401 406 * A couple of shorthand macros to identify "later" P6-family chips
402 407 * like the Pentium M and Core. First, the "older" P6-based stuff
403 408 * (loosely defined as "pre-Pentium-4"):
404 409 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
405 410 */
406 411
407 412 #define IS_LEGACY_P6(cpi) ( \
408 413 cpi->cpi_family == 6 && \
409 414 (cpi->cpi_model == 1 || \
410 415 cpi->cpi_model == 3 || \
411 416 cpi->cpi_model == 5 || \
412 417 cpi->cpi_model == 6 || \
413 418 cpi->cpi_model == 7 || \
414 419 cpi->cpi_model == 8 || \
415 420 cpi->cpi_model == 0xA || \
416 421 cpi->cpi_model == 0xB) \
417 422 )
418 423
419 424 /* A "new F6" is everything with family 6 that's not the above */
420 425 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
421 426
422 427 /* Extended family/model support */
423 428 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
424 429 cpi->cpi_family >= 0xf)
425 430
426 431 /*
427 432 * Info for monitor/mwait idle loop.
428 433 *
429 434 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
430 435 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
431 436 * 2006.
432 437 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
433 438 * Documentation Updates" #33633, Rev 2.05, December 2006.
434 439 */
435 440 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
436 441 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
437 442 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
438 443 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
439 444 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
440 445 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
441 446 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
442 447 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
443 448 /*
444 449 * Number of sub-cstates for a given c-state.
445 450 */
446 451 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
447 452 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
448 453
449 454 /*
450 455 * XSAVE leaf 0xD enumeration
451 456 */
452 457 #define CPUID_LEAFD_2_YMM_OFFSET 576
453 458 #define CPUID_LEAFD_2_YMM_SIZE 256
454 459
455 460 /*
456 461 * Functions we consune from cpuid_subr.c; don't publish these in a header
457 462 * file to try and keep people using the expected cpuid_* interfaces.
458 463 */
459 464 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
460 465 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
461 466 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
462 467 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
463 468 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
464 469
465 470 /*
466 471 * Apply up various platform-dependent restrictions where the
467 472 * underlying platform restrictions mean the CPU can be marked
468 473 * as less capable than its cpuid instruction would imply.
469 474 */
470 475 #if defined(__xpv)
471 476 static void
472 477 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
473 478 {
474 479 switch (eax) {
475 480 case 1: {
476 481 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
477 482 0 : CPUID_INTC_EDX_MCA;
478 483 cp->cp_edx &=
479 484 ~(mcamask |
480 485 CPUID_INTC_EDX_PSE |
481 486 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
482 487 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
483 488 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
484 489 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
485 490 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
486 491 break;
487 492 }
488 493
489 494 case 0x80000001:
490 495 cp->cp_edx &=
491 496 ~(CPUID_AMD_EDX_PSE |
492 497 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
493 498 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
494 499 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
495 500 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
496 501 CPUID_AMD_EDX_TSCP);
497 502 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
498 503 break;
499 504 default:
500 505 break;
501 506 }
502 507
503 508 switch (vendor) {
504 509 case X86_VENDOR_Intel:
505 510 switch (eax) {
506 511 case 4:
507 512 /*
508 513 * Zero out the (ncores-per-chip - 1) field
509 514 */
510 515 cp->cp_eax &= 0x03fffffff;
511 516 break;
512 517 default:
513 518 break;
514 519 }
515 520 break;
516 521 case X86_VENDOR_AMD:
517 522 switch (eax) {
518 523
519 524 case 0x80000001:
520 525 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
521 526 break;
522 527
523 528 case 0x80000008:
524 529 /*
525 530 * Zero out the (ncores-per-chip - 1) field
526 531 */
527 532 cp->cp_ecx &= 0xffffff00;
528 533 break;
529 534 default:
530 535 break;
531 536 }
532 537 break;
533 538 default:
534 539 break;
535 540 }
536 541 }
537 542 #else
538 543 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
539 544 #endif
540 545
541 546 /*
542 547 * Some undocumented ways of patching the results of the cpuid
543 548 * instruction to permit running Solaris 10 on future cpus that
544 549 * we don't currently support. Could be set to non-zero values
545 550 * via settings in eeprom.
546 551 */
547 552
548 553 uint32_t cpuid_feature_ecx_include;
549 554 uint32_t cpuid_feature_ecx_exclude;
550 555 uint32_t cpuid_feature_edx_include;
551 556 uint32_t cpuid_feature_edx_exclude;
552 557
553 558 /*
554 559 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
555 560 */
556 561 void
557 562 cpuid_alloc_space(cpu_t *cpu)
558 563 {
559 564 /*
560 565 * By convention, cpu0 is the boot cpu, which is set up
561 566 * before memory allocation is available. All other cpus get
562 567 * their cpuid_info struct allocated here.
563 568 */
564 569 ASSERT(cpu->cpu_id != 0);
565 570 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
566 571 cpu->cpu_m.mcpu_cpi =
567 572 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
568 573 }
569 574
570 575 void
571 576 cpuid_free_space(cpu_t *cpu)
572 577 {
573 578 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
574 579 int i;
575 580
576 581 ASSERT(cpi != NULL);
577 582 ASSERT(cpi != &cpuid_info0);
578 583
579 584 /*
580 585 * Free up any function 4 related dynamic storage
581 586 */
582 587 for (i = 1; i < cpi->cpi_std_4_size; i++)
583 588 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
584 589 if (cpi->cpi_std_4_size > 0)
585 590 kmem_free(cpi->cpi_std_4,
586 591 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
587 592
588 593 kmem_free(cpi, sizeof (*cpi));
589 594 cpu->cpu_m.mcpu_cpi = NULL;
590 595 }
591 596
592 597 #if !defined(__xpv)
593 598
594 599 /*
595 600 * Determine the type of the underlying platform. This is used to customize
596 601 * initialization of various subsystems (e.g. TSC). determine_platform() must
597 602 * only ever be called once to prevent two processors from seeing different
598 603 * values of platform_type, it must be called before cpuid_pass1(), the
599 604 * earliest consumer to execute.
600 605 */
601 606 void
602 607 determine_platform(void)
603 608 {
604 609 struct cpuid_regs cp;
605 610 char *xen_str;
606 611 uint32_t xen_signature[4], base;
607 612
608 613 ASSERT(platform_type == -1);
609 614
610 615 platform_type = HW_NATIVE;
611 616
612 617 if (!enable_platform_detection)
613 618 return;
614 619
615 620 /*
616 621 * In a fully virtualized domain, Xen's pseudo-cpuid function
617 622 * returns a string representing the Xen signature in %ebx, %ecx,
618 623 * and %edx. %eax contains the maximum supported cpuid function.
619 624 * We need at least a (base + 2) leaf value to do what we want
620 625 * to do. Try different base values, since the hypervisor might
621 626 * use a different one depending on whether hyper-v emulation
622 627 * is switched on by default or not.
623 628 */
624 629 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
625 630 cp.cp_eax = base;
626 631 (void) __cpuid_insn(&cp);
627 632 xen_signature[0] = cp.cp_ebx;
628 633 xen_signature[1] = cp.cp_ecx;
629 634 xen_signature[2] = cp.cp_edx;
630 635 xen_signature[3] = 0;
631 636 xen_str = (char *)xen_signature;
632 637 if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
633 638 cp.cp_eax >= (base + 2)) {
634 639 platform_type = HW_XEN_HVM;
635 640 return;
636 641 }
637 642 }
638 643
639 644 if (vmware_platform()) /* running under vmware hypervisor? */
640 645 platform_type = HW_VMWARE;
641 646 }
642 647
643 648 int
644 649 get_hwenv(void)
645 650 {
646 651 ASSERT(platform_type != -1);
647 652 return (platform_type);
648 653 }
649 654
650 655 int
651 656 is_controldom(void)
652 657 {
653 658 return (0);
654 659 }
655 660
656 661 #else
657 662
658 663 int
659 664 get_hwenv(void)
660 665 {
661 666 return (HW_XEN_PV);
662 667 }
663 668
664 669 int
665 670 is_controldom(void)
666 671 {
667 672 return (DOMAIN_IS_INITDOMAIN(xen_info));
668 673 }
669 674
670 675 #endif /* __xpv */
671 676
672 677 static void
673 678 cpuid_intel_getids(cpu_t *cpu, void *feature)
674 679 {
675 680 uint_t i;
676 681 uint_t chipid_shift = 0;
677 682 uint_t coreid_shift = 0;
678 683 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
679 684
680 685 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
681 686 chipid_shift++;
682 687
683 688 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
684 689 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
685 690
686 691 if (is_x86_feature(feature, X86FSET_CMP)) {
687 692 /*
688 693 * Multi-core (and possibly multi-threaded)
689 694 * processors.
690 695 */
691 696 uint_t ncpu_per_core;
692 697 if (cpi->cpi_ncore_per_chip == 1)
693 698 ncpu_per_core = cpi->cpi_ncpu_per_chip;
694 699 else if (cpi->cpi_ncore_per_chip > 1)
695 700 ncpu_per_core = cpi->cpi_ncpu_per_chip /
696 701 cpi->cpi_ncore_per_chip;
697 702 /*
698 703 * 8bit APIC IDs on dual core Pentiums
699 704 * look like this:
700 705 *
701 706 * +-----------------------+------+------+
702 707 * | Physical Package ID | MC | HT |
703 708 * +-----------------------+------+------+
704 709 * <------- chipid -------->
705 710 * <------- coreid --------------->
706 711 * <--- clogid -->
707 712 * <------>
708 713 * pkgcoreid
709 714 *
710 715 * Where the number of bits necessary to
711 716 * represent MC and HT fields together equals
712 717 * to the minimum number of bits necessary to
713 718 * store the value of cpi->cpi_ncpu_per_chip.
714 719 * Of those bits, the MC part uses the number
715 720 * of bits necessary to store the value of
716 721 * cpi->cpi_ncore_per_chip.
717 722 */
718 723 for (i = 1; i < ncpu_per_core; i <<= 1)
719 724 coreid_shift++;
↓ open down ↓ |
364 lines elided |
↑ open up ↑ |
720 725 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
721 726 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
722 727 } else if (is_x86_feature(feature, X86FSET_HTT)) {
723 728 /*
724 729 * Single-core multi-threaded processors.
725 730 */
726 731 cpi->cpi_coreid = cpi->cpi_chipid;
727 732 cpi->cpi_pkgcoreid = 0;
728 733 }
729 734 cpi->cpi_procnodeid = cpi->cpi_chipid;
735 + cpi->cpi_compunitid = cpi->cpi_coreid;
730 736 }
731 737
732 738 static void
733 739 cpuid_amd_getids(cpu_t *cpu)
734 740 {
735 741 int i, first_half, coreidsz;
736 742 uint32_t nb_caps_reg;
737 743 uint_t node2_1;
738 744 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
745 + struct cpuid_regs *cp;
739 746
740 747 /*
741 748 * AMD CMP chips currently have a single thread per core.
742 749 *
743 750 * Since no two cpus share a core we must assign a distinct coreid
744 751 * per cpu, and we do this by using the cpu_id. This scheme does not,
745 752 * however, guarantee that sibling cores of a chip will have sequential
746 753 * coreids starting at a multiple of the number of cores per chip -
747 754 * that is usually the case, but if the ACPI MADT table is presented
748 755 * in a different order then we need to perform a few more gymnastics
749 756 * for the pkgcoreid.
750 757 *
751 758 * All processors in the system have the same number of enabled
752 759 * cores. Cores within a processor are always numbered sequentially
753 760 * from 0 regardless of how many or which are disabled, and there
754 761 * is no way for operating system to discover the real core id when some
755 762 * are disabled.
763 + *
764 + * In family 0x15, the cores come in pairs called compute units. They
765 + * share L1I and L2 caches and the FPU. Enumeration of this features is
766 + * simplified by the new topology extensions CPUID leaf, indicated by the
767 + * X86 feature X86FSET_TOPOEXT.
756 768 */
757 769
758 770 cpi->cpi_coreid = cpu->cpu_id;
771 + cpi->cpi_compunitid = cpu->cpu_id;
759 772
760 773 if (cpi->cpi_xmaxeax >= 0x80000008) {
761 774
762 775 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
763 776
764 777 /*
765 778 * In AMD parlance chip is really a node while Solaris
766 779 * sees chip as equivalent to socket/package.
767 780 */
768 781 cpi->cpi_ncore_per_chip =
769 782 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
770 783 if (coreidsz == 0) {
771 784 /* Use legacy method */
772 785 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
773 786 coreidsz++;
774 787 if (coreidsz == 0)
775 788 coreidsz = 1;
776 789 }
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
777 790 } else {
778 791 /* Assume single-core part */
779 792 cpi->cpi_ncore_per_chip = 1;
780 793 coreidsz = 1;
781 794 }
782 795
783 796 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
784 797 cpi->cpi_apicid & ((1<<coreidsz) - 1);
785 798 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
786 799
787 - /* Get nodeID */
788 - if (cpi->cpi_family == 0xf) {
800 + /* Get node ID, compute unit ID */
801 + if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
802 + cpi->cpi_xmaxeax >= 0x8000001e) {
803 + cp = &cpi->cpi_extd[0x1e];
804 + cp->cp_eax = 0x8000001e;
805 + (void) __cpuid_insn(cp);
806 +
807 + cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
808 + cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
809 + cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
810 + cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0);
811 + } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
789 812 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
790 813 cpi->cpi_chipid = cpi->cpi_procnodeid;
791 814 } else if (cpi->cpi_family == 0x10) {
792 815 /*
793 816 * See if we are a multi-node processor.
794 817 * All processors in the system have the same number of nodes
795 818 */
796 819 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
797 820 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
798 821 /* Single-node */
799 822 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
800 823 coreidsz);
801 824 cpi->cpi_chipid = cpi->cpi_procnodeid;
802 825 } else {
803 826
804 827 /*
805 828 * Multi-node revision D (2 nodes per package
806 829 * are supported)
807 830 */
808 831 cpi->cpi_procnodes_per_pkg = 2;
809 832
810 833 first_half = (cpi->cpi_pkgcoreid <=
811 834 (cpi->cpi_ncore_per_chip/2 - 1));
812 835
813 836 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
814 837 /* We are BSP */
815 838 cpi->cpi_procnodeid = (first_half ? 0 : 1);
816 839 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
817 840 } else {
818 841
819 842 /* We are AP */
820 843 /* NodeId[2:1] bits to use for reading F3xe8 */
821 844 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
822 845
823 846 nb_caps_reg =
824 847 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
825 848
826 849 /*
827 850 * Check IntNodeNum bit (31:30, but bit 31 is
828 851 * always 0 on dual-node processors)
829 852 */
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
830 853 if (BITX(nb_caps_reg, 30, 30) == 0)
831 854 cpi->cpi_procnodeid = node2_1 +
832 855 !first_half;
833 856 else
834 857 cpi->cpi_procnodeid = node2_1 +
835 858 first_half;
836 859
837 860 cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
838 861 }
839 862 }
840 - } else if (cpi->cpi_family >= 0x11) {
841 - cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
842 - cpi->cpi_chipid = cpi->cpi_procnodeid;
843 863 } else {
844 864 cpi->cpi_procnodeid = 0;
845 865 cpi->cpi_chipid = cpi->cpi_procnodeid;
846 866 }
847 867 }
848 868
849 869 /*
850 870 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
851 871 */
852 872 void
853 873 setup_xfem(void)
854 874 {
855 875 uint64_t flags = XFEATURE_LEGACY_FP;
856 876
857 877 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
858 878
859 879 if (is_x86_feature(x86_featureset, X86FSET_SSE))
860 880 flags |= XFEATURE_SSE;
861 881
862 882 if (is_x86_feature(x86_featureset, X86FSET_AVX))
863 883 flags |= XFEATURE_AVX;
864 884
865 885 set_xcr(XFEATURE_ENABLED_MASK, flags);
866 886
867 887 xsave_bv_all = flags;
868 888 }
869 889
870 890 void
871 891 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
872 892 {
873 893 uint32_t mask_ecx, mask_edx;
874 894 struct cpuid_info *cpi;
875 895 struct cpuid_regs *cp;
876 896 int xcpuid;
877 897 #if !defined(__xpv)
878 898 extern int idle_cpu_prefer_mwait;
879 899 #endif
880 900
881 901 /*
882 902 * Space statically allocated for BSP, ensure pointer is set
883 903 */
884 904 if (cpu->cpu_id == 0) {
885 905 if (cpu->cpu_m.mcpu_cpi == NULL)
886 906 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
887 907 }
888 908
889 909 add_x86_feature(featureset, X86FSET_CPUID);
890 910
891 911 cpi = cpu->cpu_m.mcpu_cpi;
892 912 ASSERT(cpi != NULL);
893 913 cp = &cpi->cpi_std[0];
894 914 cp->cp_eax = 0;
895 915 cpi->cpi_maxeax = __cpuid_insn(cp);
896 916 {
897 917 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
898 918 *iptr++ = cp->cp_ebx;
899 919 *iptr++ = cp->cp_edx;
900 920 *iptr++ = cp->cp_ecx;
901 921 *(char *)&cpi->cpi_vendorstr[12] = '\0';
902 922 }
903 923
904 924 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
905 925 x86_vendor = cpi->cpi_vendor; /* for compatibility */
906 926
907 927 /*
908 928 * Limit the range in case of weird hardware
909 929 */
910 930 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
911 931 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
912 932 if (cpi->cpi_maxeax < 1)
913 933 goto pass1_done;
914 934
915 935 cp = &cpi->cpi_std[1];
916 936 cp->cp_eax = 1;
917 937 (void) __cpuid_insn(cp);
918 938
919 939 /*
920 940 * Extract identifying constants for easy access.
921 941 */
922 942 cpi->cpi_model = CPI_MODEL(cpi);
923 943 cpi->cpi_family = CPI_FAMILY(cpi);
924 944
925 945 if (cpi->cpi_family == 0xf)
926 946 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
927 947
928 948 /*
929 949 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
930 950 * Intel, and presumably everyone else, uses model == 0xf, as
931 951 * one would expect (max value means possible overflow). Sigh.
932 952 */
933 953
934 954 switch (cpi->cpi_vendor) {
935 955 case X86_VENDOR_Intel:
936 956 if (IS_EXTENDED_MODEL_INTEL(cpi))
937 957 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
938 958 break;
939 959 case X86_VENDOR_AMD:
940 960 if (CPI_FAMILY(cpi) == 0xf)
941 961 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
942 962 break;
943 963 default:
944 964 if (cpi->cpi_model == 0xf)
945 965 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
946 966 break;
947 967 }
948 968
949 969 cpi->cpi_step = CPI_STEP(cpi);
950 970 cpi->cpi_brandid = CPI_BRANDID(cpi);
951 971
952 972 /*
953 973 * *default* assumptions:
954 974 * - believe %edx feature word
955 975 * - ignore %ecx feature word
956 976 * - 32-bit virtual and physical addressing
957 977 */
958 978 mask_edx = 0xffffffff;
959 979 mask_ecx = 0;
960 980
961 981 cpi->cpi_pabits = cpi->cpi_vabits = 32;
962 982
963 983 switch (cpi->cpi_vendor) {
964 984 case X86_VENDOR_Intel:
965 985 if (cpi->cpi_family == 5)
966 986 x86_type = X86_TYPE_P5;
967 987 else if (IS_LEGACY_P6(cpi)) {
968 988 x86_type = X86_TYPE_P6;
969 989 pentiumpro_bug4046376 = 1;
970 990 pentiumpro_bug4064495 = 1;
971 991 /*
972 992 * Clear the SEP bit when it was set erroneously
973 993 */
974 994 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
975 995 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
976 996 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
977 997 x86_type = X86_TYPE_P4;
978 998 /*
979 999 * We don't currently depend on any of the %ecx
980 1000 * features until Prescott, so we'll only check
981 1001 * this from P4 onwards. We might want to revisit
982 1002 * that idea later.
983 1003 */
984 1004 mask_ecx = 0xffffffff;
985 1005 } else if (cpi->cpi_family > 0xf)
986 1006 mask_ecx = 0xffffffff;
987 1007 /*
988 1008 * We don't support MONITOR/MWAIT if leaf 5 is not available
989 1009 * to obtain the monitor linesize.
990 1010 */
991 1011 if (cpi->cpi_maxeax < 5)
992 1012 mask_ecx &= ~CPUID_INTC_ECX_MON;
993 1013 break;
994 1014 case X86_VENDOR_IntelClone:
995 1015 default:
996 1016 break;
997 1017 case X86_VENDOR_AMD:
998 1018 #if defined(OPTERON_ERRATUM_108)
999 1019 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1000 1020 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1001 1021 cpi->cpi_model = 0xc;
1002 1022 } else
1003 1023 #endif
1004 1024 if (cpi->cpi_family == 5) {
1005 1025 /*
1006 1026 * AMD K5 and K6
1007 1027 *
1008 1028 * These CPUs have an incomplete implementation
1009 1029 * of MCA/MCE which we mask away.
1010 1030 */
1011 1031 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1012 1032
1013 1033 /*
1014 1034 * Model 0 uses the wrong (APIC) bit
1015 1035 * to indicate PGE. Fix it here.
1016 1036 */
1017 1037 if (cpi->cpi_model == 0) {
1018 1038 if (cp->cp_edx & 0x200) {
1019 1039 cp->cp_edx &= ~0x200;
1020 1040 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1021 1041 }
1022 1042 }
1023 1043
1024 1044 /*
1025 1045 * Early models had problems w/ MMX; disable.
1026 1046 */
1027 1047 if (cpi->cpi_model < 6)
1028 1048 mask_edx &= ~CPUID_INTC_EDX_MMX;
1029 1049 }
1030 1050
1031 1051 /*
1032 1052 * For newer families, SSE3 and CX16, at least, are valid;
1033 1053 * enable all
1034 1054 */
1035 1055 if (cpi->cpi_family >= 0xf)
1036 1056 mask_ecx = 0xffffffff;
1037 1057 /*
1038 1058 * We don't support MONITOR/MWAIT if leaf 5 is not available
1039 1059 * to obtain the monitor linesize.
1040 1060 */
1041 1061 if (cpi->cpi_maxeax < 5)
1042 1062 mask_ecx &= ~CPUID_INTC_ECX_MON;
1043 1063
1044 1064 #if !defined(__xpv)
1045 1065 /*
1046 1066 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1047 1067 * processors. AMD does not intend MWAIT to be used in the cpu
1048 1068 * idle loop on current and future processors. 10h and future
1049 1069 * AMD processors use more power in MWAIT than HLT.
1050 1070 * Pre-family-10h Opterons do not have the MWAIT instruction.
1051 1071 */
1052 1072 idle_cpu_prefer_mwait = 0;
1053 1073 #endif
1054 1074
1055 1075 break;
1056 1076 case X86_VENDOR_TM:
1057 1077 /*
1058 1078 * workaround the NT workaround in CMS 4.1
1059 1079 */
1060 1080 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1061 1081 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1062 1082 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1063 1083 break;
1064 1084 case X86_VENDOR_Centaur:
1065 1085 /*
1066 1086 * workaround the NT workarounds again
1067 1087 */
1068 1088 if (cpi->cpi_family == 6)
1069 1089 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1070 1090 break;
1071 1091 case X86_VENDOR_Cyrix:
1072 1092 /*
1073 1093 * We rely heavily on the probing in locore
1074 1094 * to actually figure out what parts, if any,
1075 1095 * of the Cyrix cpuid instruction to believe.
1076 1096 */
1077 1097 switch (x86_type) {
1078 1098 case X86_TYPE_CYRIX_486:
1079 1099 mask_edx = 0;
1080 1100 break;
1081 1101 case X86_TYPE_CYRIX_6x86:
1082 1102 mask_edx = 0;
1083 1103 break;
1084 1104 case X86_TYPE_CYRIX_6x86L:
1085 1105 mask_edx =
1086 1106 CPUID_INTC_EDX_DE |
1087 1107 CPUID_INTC_EDX_CX8;
1088 1108 break;
1089 1109 case X86_TYPE_CYRIX_6x86MX:
1090 1110 mask_edx =
1091 1111 CPUID_INTC_EDX_DE |
1092 1112 CPUID_INTC_EDX_MSR |
1093 1113 CPUID_INTC_EDX_CX8 |
1094 1114 CPUID_INTC_EDX_PGE |
1095 1115 CPUID_INTC_EDX_CMOV |
1096 1116 CPUID_INTC_EDX_MMX;
1097 1117 break;
1098 1118 case X86_TYPE_CYRIX_GXm:
1099 1119 mask_edx =
1100 1120 CPUID_INTC_EDX_MSR |
1101 1121 CPUID_INTC_EDX_CX8 |
1102 1122 CPUID_INTC_EDX_CMOV |
1103 1123 CPUID_INTC_EDX_MMX;
1104 1124 break;
1105 1125 case X86_TYPE_CYRIX_MediaGX:
1106 1126 break;
1107 1127 case X86_TYPE_CYRIX_MII:
1108 1128 case X86_TYPE_VIA_CYRIX_III:
1109 1129 mask_edx =
1110 1130 CPUID_INTC_EDX_DE |
1111 1131 CPUID_INTC_EDX_TSC |
1112 1132 CPUID_INTC_EDX_MSR |
1113 1133 CPUID_INTC_EDX_CX8 |
1114 1134 CPUID_INTC_EDX_PGE |
1115 1135 CPUID_INTC_EDX_CMOV |
1116 1136 CPUID_INTC_EDX_MMX;
1117 1137 break;
1118 1138 default:
1119 1139 break;
1120 1140 }
1121 1141 break;
1122 1142 }
1123 1143
1124 1144 #if defined(__xpv)
1125 1145 /*
1126 1146 * Do not support MONITOR/MWAIT under a hypervisor
1127 1147 */
1128 1148 mask_ecx &= ~CPUID_INTC_ECX_MON;
1129 1149 /*
1130 1150 * Do not support XSAVE under a hypervisor for now
1131 1151 */
1132 1152 xsave_force_disable = B_TRUE;
1133 1153
1134 1154 #endif /* __xpv */
1135 1155
1136 1156 if (xsave_force_disable) {
1137 1157 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1138 1158 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1139 1159 }
1140 1160
1141 1161 /*
1142 1162 * Now we've figured out the masks that determine
1143 1163 * which bits we choose to believe, apply the masks
1144 1164 * to the feature words, then map the kernel's view
1145 1165 * of these feature words into its feature word.
1146 1166 */
1147 1167 cp->cp_edx &= mask_edx;
1148 1168 cp->cp_ecx &= mask_ecx;
1149 1169
1150 1170 /*
1151 1171 * apply any platform restrictions (we don't call this
1152 1172 * immediately after __cpuid_insn here, because we need the
1153 1173 * workarounds applied above first)
1154 1174 */
1155 1175 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1156 1176
1157 1177 /*
1158 1178 * fold in overrides from the "eeprom" mechanism
1159 1179 */
1160 1180 cp->cp_edx |= cpuid_feature_edx_include;
1161 1181 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1162 1182
1163 1183 cp->cp_ecx |= cpuid_feature_ecx_include;
1164 1184 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1165 1185
1166 1186 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1167 1187 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1168 1188 }
1169 1189 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1170 1190 add_x86_feature(featureset, X86FSET_TSC);
1171 1191 }
1172 1192 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1173 1193 add_x86_feature(featureset, X86FSET_MSR);
1174 1194 }
1175 1195 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1176 1196 add_x86_feature(featureset, X86FSET_MTRR);
1177 1197 }
1178 1198 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1179 1199 add_x86_feature(featureset, X86FSET_PGE);
1180 1200 }
1181 1201 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1182 1202 add_x86_feature(featureset, X86FSET_CMOV);
1183 1203 }
1184 1204 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1185 1205 add_x86_feature(featureset, X86FSET_MMX);
1186 1206 }
1187 1207 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1188 1208 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1189 1209 add_x86_feature(featureset, X86FSET_MCA);
1190 1210 }
1191 1211 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1192 1212 add_x86_feature(featureset, X86FSET_PAE);
1193 1213 }
1194 1214 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1195 1215 add_x86_feature(featureset, X86FSET_CX8);
1196 1216 }
1197 1217 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1198 1218 add_x86_feature(featureset, X86FSET_CX16);
1199 1219 }
1200 1220 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1201 1221 add_x86_feature(featureset, X86FSET_PAT);
1202 1222 }
1203 1223 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1204 1224 add_x86_feature(featureset, X86FSET_SEP);
1205 1225 }
1206 1226 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1207 1227 /*
1208 1228 * In our implementation, fxsave/fxrstor
1209 1229 * are prerequisites before we'll even
1210 1230 * try and do SSE things.
1211 1231 */
1212 1232 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1213 1233 add_x86_feature(featureset, X86FSET_SSE);
1214 1234 }
1215 1235 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1216 1236 add_x86_feature(featureset, X86FSET_SSE2);
1217 1237 }
1218 1238 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1219 1239 add_x86_feature(featureset, X86FSET_SSE3);
1220 1240 }
1221 1241 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1222 1242 add_x86_feature(featureset, X86FSET_SSSE3);
1223 1243 }
1224 1244 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1225 1245 add_x86_feature(featureset, X86FSET_SSE4_1);
1226 1246 }
1227 1247 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1228 1248 add_x86_feature(featureset, X86FSET_SSE4_2);
1229 1249 }
1230 1250 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1231 1251 add_x86_feature(featureset, X86FSET_AES);
1232 1252 }
1233 1253 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1234 1254 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1235 1255 }
1236 1256
1237 1257 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1238 1258 add_x86_feature(featureset, X86FSET_XSAVE);
1239 1259 /* We only test AVX when there is XSAVE */
1240 1260 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1241 1261 add_x86_feature(featureset,
1242 1262 X86FSET_AVX);
1243 1263 }
1244 1264 }
1245 1265 }
1246 1266 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1247 1267 add_x86_feature(featureset, X86FSET_DE);
1248 1268 }
1249 1269 #if !defined(__xpv)
1250 1270 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1251 1271
1252 1272 /*
1253 1273 * We require the CLFLUSH instruction for erratum workaround
1254 1274 * to use MONITOR/MWAIT.
1255 1275 */
1256 1276 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1257 1277 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1258 1278 add_x86_feature(featureset, X86FSET_MWAIT);
1259 1279 } else {
1260 1280 extern int idle_cpu_assert_cflush_monitor;
1261 1281
1262 1282 /*
1263 1283 * All processors we are aware of which have
1264 1284 * MONITOR/MWAIT also have CLFLUSH.
1265 1285 */
1266 1286 if (idle_cpu_assert_cflush_monitor) {
1267 1287 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1268 1288 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1269 1289 }
1270 1290 }
1271 1291 }
1272 1292 #endif /* __xpv */
1273 1293
1274 1294 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1275 1295 add_x86_feature(featureset, X86FSET_VMX);
1276 1296 }
1277 1297
1278 1298 /*
1279 1299 * Only need it first time, rest of the cpus would follow suit.
1280 1300 * we only capture this for the bootcpu.
1281 1301 */
1282 1302 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1283 1303 add_x86_feature(featureset, X86FSET_CLFSH);
1284 1304 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1285 1305 }
1286 1306 if (is_x86_feature(featureset, X86FSET_PAE))
1287 1307 cpi->cpi_pabits = 36;
1288 1308
1289 1309 /*
1290 1310 * Hyperthreading configuration is slightly tricky on Intel
1291 1311 * and pure clones, and even trickier on AMD.
1292 1312 *
1293 1313 * (AMD chose to set the HTT bit on their CMP processors,
1294 1314 * even though they're not actually hyperthreaded. Thus it
1295 1315 * takes a bit more work to figure out what's really going
1296 1316 * on ... see the handling of the CMP_LGCY bit below)
1297 1317 */
1298 1318 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1299 1319 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1300 1320 if (cpi->cpi_ncpu_per_chip > 1)
1301 1321 add_x86_feature(featureset, X86FSET_HTT);
1302 1322 } else {
1303 1323 cpi->cpi_ncpu_per_chip = 1;
1304 1324 }
1305 1325
1306 1326 /*
1307 1327 * Work on the "extended" feature information, doing
1308 1328 * some basic initialization for cpuid_pass2()
1309 1329 */
1310 1330 xcpuid = 0;
1311 1331 switch (cpi->cpi_vendor) {
1312 1332 case X86_VENDOR_Intel:
1313 1333 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1314 1334 xcpuid++;
1315 1335 break;
1316 1336 case X86_VENDOR_AMD:
1317 1337 if (cpi->cpi_family > 5 ||
1318 1338 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1319 1339 xcpuid++;
1320 1340 break;
1321 1341 case X86_VENDOR_Cyrix:
1322 1342 /*
1323 1343 * Only these Cyrix CPUs are -known- to support
1324 1344 * extended cpuid operations.
1325 1345 */
1326 1346 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1327 1347 x86_type == X86_TYPE_CYRIX_GXm)
1328 1348 xcpuid++;
1329 1349 break;
1330 1350 case X86_VENDOR_Centaur:
1331 1351 case X86_VENDOR_TM:
1332 1352 default:
1333 1353 xcpuid++;
1334 1354 break;
1335 1355 }
1336 1356
1337 1357 if (xcpuid) {
1338 1358 cp = &cpi->cpi_extd[0];
1339 1359 cp->cp_eax = 0x80000000;
1340 1360 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1341 1361 }
1342 1362
1343 1363 if (cpi->cpi_xmaxeax & 0x80000000) {
1344 1364
1345 1365 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1346 1366 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1347 1367
1348 1368 switch (cpi->cpi_vendor) {
1349 1369 case X86_VENDOR_Intel:
1350 1370 case X86_VENDOR_AMD:
1351 1371 if (cpi->cpi_xmaxeax < 0x80000001)
1352 1372 break;
1353 1373 cp = &cpi->cpi_extd[1];
1354 1374 cp->cp_eax = 0x80000001;
1355 1375 (void) __cpuid_insn(cp);
1356 1376
1357 1377 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1358 1378 cpi->cpi_family == 5 &&
1359 1379 cpi->cpi_model == 6 &&
1360 1380 cpi->cpi_step == 6) {
1361 1381 /*
1362 1382 * K6 model 6 uses bit 10 to indicate SYSC
1363 1383 * Later models use bit 11. Fix it here.
1364 1384 */
1365 1385 if (cp->cp_edx & 0x400) {
1366 1386 cp->cp_edx &= ~0x400;
1367 1387 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1368 1388 }
1369 1389 }
1370 1390
1371 1391 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1372 1392
1373 1393 /*
1374 1394 * Compute the additions to the kernel's feature word.
1375 1395 */
1376 1396 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1377 1397 add_x86_feature(featureset, X86FSET_NX);
1378 1398 }
1379 1399
1380 1400 /*
1381 1401 * Regardless whether or not we boot 64-bit,
1382 1402 * we should have a way to identify whether
1383 1403 * the CPU is capable of running 64-bit.
1384 1404 */
1385 1405 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1386 1406 add_x86_feature(featureset, X86FSET_64);
1387 1407 }
1388 1408
1389 1409 #if defined(__amd64)
1390 1410 /* 1 GB large page - enable only for 64 bit kernel */
1391 1411 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1392 1412 add_x86_feature(featureset, X86FSET_1GPG);
1393 1413 }
1394 1414 #endif
1395 1415
1396 1416 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1397 1417 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1398 1418 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1399 1419 add_x86_feature(featureset, X86FSET_SSE4A);
1400 1420 }
1401 1421
1402 1422 /*
1403 1423 * If both the HTT and CMP_LGCY bits are set,
1404 1424 * then we're not actually HyperThreaded. Read
1405 1425 * "AMD CPUID Specification" for more details.
1406 1426 */
1407 1427 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1408 1428 is_x86_feature(featureset, X86FSET_HTT) &&
1409 1429 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1410 1430 remove_x86_feature(featureset, X86FSET_HTT);
1411 1431 add_x86_feature(featureset, X86FSET_CMP);
1412 1432 }
1413 1433 #if defined(__amd64)
1414 1434 /*
1415 1435 * It's really tricky to support syscall/sysret in
1416 1436 * the i386 kernel; we rely on sysenter/sysexit
1417 1437 * instead. In the amd64 kernel, things are -way-
1418 1438 * better.
1419 1439 */
1420 1440 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1421 1441 add_x86_feature(featureset, X86FSET_ASYSC);
1422 1442 }
1423 1443
1424 1444 /*
1425 1445 * While we're thinking about system calls, note
1426 1446 * that AMD processors don't support sysenter
1427 1447 * in long mode at all, so don't try to program them.
1428 1448 */
1429 1449 if (x86_vendor == X86_VENDOR_AMD) {
↓ open down ↓ |
577 lines elided |
↑ open up ↑ |
1430 1450 remove_x86_feature(featureset, X86FSET_SEP);
1431 1451 }
1432 1452 #endif
1433 1453 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1434 1454 add_x86_feature(featureset, X86FSET_TSCP);
1435 1455 }
1436 1456
1437 1457 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1438 1458 add_x86_feature(featureset, X86FSET_SVM);
1439 1459 }
1460 +
1461 + if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1462 + add_x86_feature(featureset, X86FSET_TOPOEXT);
1463 + }
1440 1464 break;
1441 1465 default:
1442 1466 break;
1443 1467 }
1444 1468
1445 1469 /*
1446 1470 * Get CPUID data about processor cores and hyperthreads.
1447 1471 */
1448 1472 switch (cpi->cpi_vendor) {
1449 1473 case X86_VENDOR_Intel:
1450 1474 if (cpi->cpi_maxeax >= 4) {
1451 1475 cp = &cpi->cpi_std[4];
1452 1476 cp->cp_eax = 4;
1453 1477 cp->cp_ecx = 0;
1454 1478 (void) __cpuid_insn(cp);
1455 1479 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1456 1480 }
1457 1481 /*FALLTHROUGH*/
1458 1482 case X86_VENDOR_AMD:
1459 1483 if (cpi->cpi_xmaxeax < 0x80000008)
1460 1484 break;
1461 1485 cp = &cpi->cpi_extd[8];
1462 1486 cp->cp_eax = 0x80000008;
1463 1487 (void) __cpuid_insn(cp);
1464 1488 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1465 1489
1466 1490 /*
1467 1491 * Virtual and physical address limits from
1468 1492 * cpuid override previously guessed values.
1469 1493 */
1470 1494 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1471 1495 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1472 1496 break;
1473 1497 default:
1474 1498 break;
1475 1499 }
1476 1500
1477 1501 /*
1478 1502 * Derive the number of cores per chip
1479 1503 */
1480 1504 switch (cpi->cpi_vendor) {
1481 1505 case X86_VENDOR_Intel:
1482 1506 if (cpi->cpi_maxeax < 4) {
1483 1507 cpi->cpi_ncore_per_chip = 1;
1484 1508 break;
1485 1509 } else {
1486 1510 cpi->cpi_ncore_per_chip =
1487 1511 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1488 1512 }
1489 1513 break;
1490 1514 case X86_VENDOR_AMD:
1491 1515 if (cpi->cpi_xmaxeax < 0x80000008) {
1492 1516 cpi->cpi_ncore_per_chip = 1;
1493 1517 break;
1494 1518 } else {
1495 1519 /*
1496 1520 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1497 1521 * 1 less than the number of physical cores on
1498 1522 * the chip. In family 0x10 this value can
1499 1523 * be affected by "downcoring" - it reflects
1500 1524 * 1 less than the number of cores actually
1501 1525 * enabled on this node.
1502 1526 */
1503 1527 cpi->cpi_ncore_per_chip =
1504 1528 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1505 1529 }
1506 1530 break;
1507 1531 default:
1508 1532 cpi->cpi_ncore_per_chip = 1;
1509 1533 break;
1510 1534 }
1511 1535
1512 1536 /*
1513 1537 * Get CPUID data about TSC Invariance in Deep C-State.
1514 1538 */
1515 1539 switch (cpi->cpi_vendor) {
1516 1540 case X86_VENDOR_Intel:
1517 1541 if (cpi->cpi_maxeax >= 7) {
1518 1542 cp = &cpi->cpi_extd[7];
1519 1543 cp->cp_eax = 0x80000007;
1520 1544 cp->cp_ecx = 0;
1521 1545 (void) __cpuid_insn(cp);
1522 1546 }
1523 1547 break;
1524 1548 default:
1525 1549 break;
1526 1550 }
1527 1551 } else {
1528 1552 cpi->cpi_ncore_per_chip = 1;
1529 1553 }
1530 1554
1531 1555 /*
1532 1556 * If more than one core, then this processor is CMP.
1533 1557 */
1534 1558 if (cpi->cpi_ncore_per_chip > 1) {
1535 1559 add_x86_feature(featureset, X86FSET_CMP);
1536 1560 }
1537 1561
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
1538 1562 /*
1539 1563 * If the number of cores is the same as the number
1540 1564 * of CPUs, then we cannot have HyperThreading.
1541 1565 */
1542 1566 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1543 1567 remove_x86_feature(featureset, X86FSET_HTT);
1544 1568 }
1545 1569
1546 1570 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1547 1571 cpi->cpi_procnodes_per_pkg = 1;
1572 + cpi->cpi_cores_per_compunit = 1;
1548 1573 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1549 1574 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1550 1575 /*
1551 1576 * Single-core single-threaded processors.
1552 1577 */
1553 1578 cpi->cpi_chipid = -1;
1554 1579 cpi->cpi_clogid = 0;
1555 1580 cpi->cpi_coreid = cpu->cpu_id;
1556 1581 cpi->cpi_pkgcoreid = 0;
1557 1582 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1558 1583 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1559 1584 else
1560 1585 cpi->cpi_procnodeid = cpi->cpi_chipid;
1561 1586 } else if (cpi->cpi_ncpu_per_chip > 1) {
1562 1587 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1563 1588 cpuid_intel_getids(cpu, featureset);
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1564 1589 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1565 1590 cpuid_amd_getids(cpu);
1566 1591 else {
1567 1592 /*
1568 1593 * All other processors are currently
1569 1594 * assumed to have single cores.
1570 1595 */
1571 1596 cpi->cpi_coreid = cpi->cpi_chipid;
1572 1597 cpi->cpi_pkgcoreid = 0;
1573 1598 cpi->cpi_procnodeid = cpi->cpi_chipid;
1599 + cpi->cpi_compunitid = cpi->cpi_chipid;
1574 1600 }
1575 1601 }
1576 1602
1577 1603 /*
1578 1604 * Synthesize chip "revision" and socket type
1579 1605 */
1580 1606 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1581 1607 cpi->cpi_model, cpi->cpi_step);
1582 1608 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1583 1609 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1584 1610 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1585 1611 cpi->cpi_model, cpi->cpi_step);
1586 1612
1587 1613 pass1_done:
1588 1614 cpi->cpi_pass = 1;
1589 1615 }
1590 1616
1591 1617 /*
1592 1618 * Make copies of the cpuid table entries we depend on, in
1593 1619 * part for ease of parsing now, in part so that we have only
1594 1620 * one place to correct any of it, in part for ease of
1595 1621 * later export to userland, and in part so we can look at
1596 1622 * this stuff in a crash dump.
1597 1623 */
1598 1624
1599 1625 /*ARGSUSED*/
1600 1626 void
1601 1627 cpuid_pass2(cpu_t *cpu)
1602 1628 {
1603 1629 uint_t n, nmax;
1604 1630 int i;
1605 1631 struct cpuid_regs *cp;
1606 1632 uint8_t *dp;
1607 1633 uint32_t *iptr;
1608 1634 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1609 1635
1610 1636 ASSERT(cpi->cpi_pass == 1);
1611 1637
1612 1638 if (cpi->cpi_maxeax < 1)
1613 1639 goto pass2_done;
1614 1640
1615 1641 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1616 1642 nmax = NMAX_CPI_STD;
1617 1643 /*
1618 1644 * (We already handled n == 0 and n == 1 in pass 1)
1619 1645 */
1620 1646 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1621 1647 cp->cp_eax = n;
1622 1648
1623 1649 /*
1624 1650 * CPUID function 4 expects %ecx to be initialized
1625 1651 * with an index which indicates which cache to return
1626 1652 * information about. The OS is expected to call function 4
1627 1653 * with %ecx set to 0, 1, 2, ... until it returns with
1628 1654 * EAX[4:0] set to 0, which indicates there are no more
1629 1655 * caches.
1630 1656 *
1631 1657 * Here, populate cpi_std[4] with the information returned by
1632 1658 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1633 1659 * when dynamic memory allocation becomes available.
1634 1660 *
1635 1661 * Note: we need to explicitly initialize %ecx here, since
1636 1662 * function 4 may have been previously invoked.
1637 1663 */
1638 1664 if (n == 4)
1639 1665 cp->cp_ecx = 0;
1640 1666
1641 1667 (void) __cpuid_insn(cp);
1642 1668 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1643 1669 switch (n) {
1644 1670 case 2:
1645 1671 /*
1646 1672 * "the lower 8 bits of the %eax register
1647 1673 * contain a value that identifies the number
1648 1674 * of times the cpuid [instruction] has to be
1649 1675 * executed to obtain a complete image of the
1650 1676 * processor's caching systems."
1651 1677 *
1652 1678 * How *do* they make this stuff up?
1653 1679 */
1654 1680 cpi->cpi_ncache = sizeof (*cp) *
1655 1681 BITX(cp->cp_eax, 7, 0);
1656 1682 if (cpi->cpi_ncache == 0)
1657 1683 break;
1658 1684 cpi->cpi_ncache--; /* skip count byte */
1659 1685
1660 1686 /*
1661 1687 * Well, for now, rather than attempt to implement
1662 1688 * this slightly dubious algorithm, we just look
1663 1689 * at the first 15 ..
1664 1690 */
1665 1691 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1666 1692 cpi->cpi_ncache = sizeof (*cp) - 1;
1667 1693
1668 1694 dp = cpi->cpi_cacheinfo;
1669 1695 if (BITX(cp->cp_eax, 31, 31) == 0) {
1670 1696 uint8_t *p = (void *)&cp->cp_eax;
1671 1697 for (i = 1; i < 4; i++)
1672 1698 if (p[i] != 0)
1673 1699 *dp++ = p[i];
1674 1700 }
1675 1701 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1676 1702 uint8_t *p = (void *)&cp->cp_ebx;
1677 1703 for (i = 0; i < 4; i++)
1678 1704 if (p[i] != 0)
1679 1705 *dp++ = p[i];
1680 1706 }
1681 1707 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1682 1708 uint8_t *p = (void *)&cp->cp_ecx;
1683 1709 for (i = 0; i < 4; i++)
1684 1710 if (p[i] != 0)
1685 1711 *dp++ = p[i];
1686 1712 }
1687 1713 if (BITX(cp->cp_edx, 31, 31) == 0) {
1688 1714 uint8_t *p = (void *)&cp->cp_edx;
1689 1715 for (i = 0; i < 4; i++)
1690 1716 if (p[i] != 0)
1691 1717 *dp++ = p[i];
1692 1718 }
1693 1719 break;
1694 1720
1695 1721 case 3: /* Processor serial number, if PSN supported */
1696 1722 break;
1697 1723
1698 1724 case 4: /* Deterministic cache parameters */
1699 1725 break;
1700 1726
1701 1727 case 5: /* Monitor/Mwait parameters */
1702 1728 {
1703 1729 size_t mwait_size;
1704 1730
1705 1731 /*
1706 1732 * check cpi_mwait.support which was set in cpuid_pass1
1707 1733 */
1708 1734 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1709 1735 break;
1710 1736
1711 1737 /*
1712 1738 * Protect ourself from insane mwait line size.
1713 1739 * Workaround for incomplete hardware emulator(s).
1714 1740 */
1715 1741 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1716 1742 if (mwait_size < sizeof (uint32_t) ||
1717 1743 !ISP2(mwait_size)) {
1718 1744 #if DEBUG
1719 1745 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1720 1746 "size %ld", cpu->cpu_id, (long)mwait_size);
1721 1747 #endif
1722 1748 break;
1723 1749 }
1724 1750
1725 1751 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1726 1752 cpi->cpi_mwait.mon_max = mwait_size;
1727 1753 if (MWAIT_EXTENSION(cpi)) {
1728 1754 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1729 1755 if (MWAIT_INT_ENABLE(cpi))
1730 1756 cpi->cpi_mwait.support |=
1731 1757 MWAIT_ECX_INT_ENABLE;
1732 1758 }
1733 1759 break;
1734 1760 }
1735 1761 default:
1736 1762 break;
1737 1763 }
1738 1764 }
1739 1765
1740 1766 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1741 1767 struct cpuid_regs regs;
1742 1768
1743 1769 cp = ®s;
1744 1770 cp->cp_eax = 0xB;
1745 1771 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1746 1772
1747 1773 (void) __cpuid_insn(cp);
1748 1774
1749 1775 /*
1750 1776 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1751 1777 * indicates that the extended topology enumeration leaf is
1752 1778 * available.
1753 1779 */
1754 1780 if (cp->cp_ebx) {
1755 1781 uint32_t x2apic_id;
1756 1782 uint_t coreid_shift = 0;
1757 1783 uint_t ncpu_per_core = 1;
1758 1784 uint_t chipid_shift = 0;
1759 1785 uint_t ncpu_per_chip = 1;
1760 1786 uint_t i;
1761 1787 uint_t level;
1762 1788
1763 1789 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1764 1790 cp->cp_eax = 0xB;
1765 1791 cp->cp_ecx = i;
1766 1792
1767 1793 (void) __cpuid_insn(cp);
1768 1794 level = CPI_CPU_LEVEL_TYPE(cp);
1769 1795
1770 1796 if (level == 1) {
1771 1797 x2apic_id = cp->cp_edx;
1772 1798 coreid_shift = BITX(cp->cp_eax, 4, 0);
1773 1799 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1774 1800 } else if (level == 2) {
1775 1801 x2apic_id = cp->cp_edx;
1776 1802 chipid_shift = BITX(cp->cp_eax, 4, 0);
1777 1803 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1778 1804 }
1779 1805 }
1780 1806
1781 1807 cpi->cpi_apicid = x2apic_id;
1782 1808 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1783 1809 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1784 1810 ncpu_per_core;
1785 1811 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1786 1812 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1787 1813 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1788 1814 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1789 1815 }
1790 1816
1791 1817 /* Make cp NULL so that we don't stumble on others */
1792 1818 cp = NULL;
1793 1819 }
1794 1820
1795 1821 /*
1796 1822 * XSAVE enumeration
1797 1823 */
1798 1824 if (cpi->cpi_maxeax >= 0xD) {
1799 1825 struct cpuid_regs regs;
1800 1826 boolean_t cpuid_d_valid = B_TRUE;
1801 1827
1802 1828 cp = ®s;
1803 1829 cp->cp_eax = 0xD;
1804 1830 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1805 1831
1806 1832 (void) __cpuid_insn(cp);
1807 1833
1808 1834 /*
1809 1835 * Sanity checks for debug
1810 1836 */
1811 1837 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1812 1838 (cp->cp_eax & XFEATURE_SSE) == 0) {
1813 1839 cpuid_d_valid = B_FALSE;
1814 1840 }
1815 1841
1816 1842 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1817 1843 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1818 1844 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1819 1845
1820 1846 /*
1821 1847 * If the hw supports AVX, get the size and offset in the save
1822 1848 * area for the ymm state.
1823 1849 */
1824 1850 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1825 1851 cp->cp_eax = 0xD;
1826 1852 cp->cp_ecx = 2;
1827 1853 cp->cp_edx = cp->cp_ebx = 0;
1828 1854
1829 1855 (void) __cpuid_insn(cp);
1830 1856
1831 1857 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1832 1858 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1833 1859 cpuid_d_valid = B_FALSE;
1834 1860 }
1835 1861
1836 1862 cpi->cpi_xsave.ymm_size = cp->cp_eax;
1837 1863 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1838 1864 }
1839 1865
1840 1866 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1841 1867 xsave_state_size = 0;
1842 1868 } else if (cpuid_d_valid) {
1843 1869 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1844 1870 } else {
1845 1871 /* Broken CPUID 0xD, probably in HVM */
1846 1872 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1847 1873 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1848 1874 ", ymm_size = %d, ymm_offset = %d\n",
1849 1875 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1850 1876 cpi->cpi_xsave.xsav_hw_features_high,
1851 1877 (int)cpi->cpi_xsave.xsav_max_size,
1852 1878 (int)cpi->cpi_xsave.ymm_size,
1853 1879 (int)cpi->cpi_xsave.ymm_offset);
1854 1880
1855 1881 if (xsave_state_size != 0) {
1856 1882 /*
1857 1883 * This must be a non-boot CPU. We cannot
1858 1884 * continue, because boot cpu has already
1859 1885 * enabled XSAVE.
1860 1886 */
1861 1887 ASSERT(cpu->cpu_id != 0);
1862 1888 cmn_err(CE_PANIC, "cpu%d: we have already "
1863 1889 "enabled XSAVE on boot cpu, cannot "
1864 1890 "continue.", cpu->cpu_id);
1865 1891 } else {
1866 1892 /*
1867 1893 * Must be from boot CPU, OK to disable XSAVE.
1868 1894 */
1869 1895 ASSERT(cpu->cpu_id == 0);
1870 1896 remove_x86_feature(x86_featureset,
1871 1897 X86FSET_XSAVE);
1872 1898 remove_x86_feature(x86_featureset, X86FSET_AVX);
1873 1899 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1874 1900 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1875 1901 xsave_force_disable = B_TRUE;
1876 1902 }
1877 1903 }
1878 1904 }
1879 1905
1880 1906
1881 1907 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1882 1908 goto pass2_done;
1883 1909
1884 1910 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1885 1911 nmax = NMAX_CPI_EXTD;
1886 1912 /*
1887 1913 * Copy the extended properties, fixing them as we go.
1888 1914 * (We already handled n == 0 and n == 1 in pass 1)
1889 1915 */
1890 1916 iptr = (void *)cpi->cpi_brandstr;
1891 1917 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1892 1918 cp->cp_eax = 0x80000000 + n;
1893 1919 (void) __cpuid_insn(cp);
1894 1920 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1895 1921 switch (n) {
1896 1922 case 2:
1897 1923 case 3:
1898 1924 case 4:
1899 1925 /*
1900 1926 * Extract the brand string
1901 1927 */
1902 1928 *iptr++ = cp->cp_eax;
1903 1929 *iptr++ = cp->cp_ebx;
1904 1930 *iptr++ = cp->cp_ecx;
1905 1931 *iptr++ = cp->cp_edx;
1906 1932 break;
1907 1933 case 5:
1908 1934 switch (cpi->cpi_vendor) {
1909 1935 case X86_VENDOR_AMD:
1910 1936 /*
1911 1937 * The Athlon and Duron were the first
1912 1938 * parts to report the sizes of the
1913 1939 * TLB for large pages. Before then,
1914 1940 * we don't trust the data.
1915 1941 */
1916 1942 if (cpi->cpi_family < 6 ||
1917 1943 (cpi->cpi_family == 6 &&
1918 1944 cpi->cpi_model < 1))
1919 1945 cp->cp_eax = 0;
1920 1946 break;
1921 1947 default:
1922 1948 break;
1923 1949 }
1924 1950 break;
1925 1951 case 6:
1926 1952 switch (cpi->cpi_vendor) {
1927 1953 case X86_VENDOR_AMD:
1928 1954 /*
1929 1955 * The Athlon and Duron were the first
1930 1956 * AMD parts with L2 TLB's.
1931 1957 * Before then, don't trust the data.
1932 1958 */
1933 1959 if (cpi->cpi_family < 6 ||
1934 1960 cpi->cpi_family == 6 &&
1935 1961 cpi->cpi_model < 1)
1936 1962 cp->cp_eax = cp->cp_ebx = 0;
1937 1963 /*
1938 1964 * AMD Duron rev A0 reports L2
1939 1965 * cache size incorrectly as 1K
1940 1966 * when it is really 64K
1941 1967 */
1942 1968 if (cpi->cpi_family == 6 &&
1943 1969 cpi->cpi_model == 3 &&
1944 1970 cpi->cpi_step == 0) {
1945 1971 cp->cp_ecx &= 0xffff;
1946 1972 cp->cp_ecx |= 0x400000;
1947 1973 }
1948 1974 break;
1949 1975 case X86_VENDOR_Cyrix: /* VIA C3 */
1950 1976 /*
1951 1977 * VIA C3 processors are a bit messed
1952 1978 * up w.r.t. encoding cache sizes in %ecx
1953 1979 */
1954 1980 if (cpi->cpi_family != 6)
1955 1981 break;
1956 1982 /*
1957 1983 * model 7 and 8 were incorrectly encoded
1958 1984 *
1959 1985 * xxx is model 8 really broken?
1960 1986 */
1961 1987 if (cpi->cpi_model == 7 ||
1962 1988 cpi->cpi_model == 8)
1963 1989 cp->cp_ecx =
1964 1990 BITX(cp->cp_ecx, 31, 24) << 16 |
1965 1991 BITX(cp->cp_ecx, 23, 16) << 12 |
1966 1992 BITX(cp->cp_ecx, 15, 8) << 8 |
1967 1993 BITX(cp->cp_ecx, 7, 0);
1968 1994 /*
1969 1995 * model 9 stepping 1 has wrong associativity
1970 1996 */
1971 1997 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1972 1998 cp->cp_ecx |= 8 << 12;
1973 1999 break;
1974 2000 case X86_VENDOR_Intel:
1975 2001 /*
1976 2002 * Extended L2 Cache features function.
1977 2003 * First appeared on Prescott.
1978 2004 */
1979 2005 default:
1980 2006 break;
1981 2007 }
1982 2008 break;
1983 2009 default:
1984 2010 break;
1985 2011 }
1986 2012 }
1987 2013
1988 2014 pass2_done:
1989 2015 cpi->cpi_pass = 2;
1990 2016 }
1991 2017
1992 2018 static const char *
1993 2019 intel_cpubrand(const struct cpuid_info *cpi)
1994 2020 {
1995 2021 int i;
1996 2022
1997 2023 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
1998 2024 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1999 2025 return ("i486");
2000 2026
2001 2027 switch (cpi->cpi_family) {
2002 2028 case 5:
2003 2029 return ("Intel Pentium(r)");
2004 2030 case 6:
2005 2031 switch (cpi->cpi_model) {
2006 2032 uint_t celeron, xeon;
2007 2033 const struct cpuid_regs *cp;
2008 2034 case 0:
2009 2035 case 1:
2010 2036 case 2:
2011 2037 return ("Intel Pentium(r) Pro");
2012 2038 case 3:
2013 2039 case 4:
2014 2040 return ("Intel Pentium(r) II");
2015 2041 case 6:
2016 2042 return ("Intel Celeron(r)");
2017 2043 case 5:
2018 2044 case 7:
2019 2045 celeron = xeon = 0;
2020 2046 cp = &cpi->cpi_std[2]; /* cache info */
2021 2047
2022 2048 for (i = 1; i < 4; i++) {
2023 2049 uint_t tmp;
2024 2050
2025 2051 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2026 2052 if (tmp == 0x40)
2027 2053 celeron++;
2028 2054 if (tmp >= 0x44 && tmp <= 0x45)
2029 2055 xeon++;
2030 2056 }
2031 2057
2032 2058 for (i = 0; i < 2; i++) {
2033 2059 uint_t tmp;
2034 2060
2035 2061 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2036 2062 if (tmp == 0x40)
2037 2063 celeron++;
2038 2064 else if (tmp >= 0x44 && tmp <= 0x45)
2039 2065 xeon++;
2040 2066 }
2041 2067
2042 2068 for (i = 0; i < 4; i++) {
2043 2069 uint_t tmp;
2044 2070
2045 2071 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2046 2072 if (tmp == 0x40)
2047 2073 celeron++;
2048 2074 else if (tmp >= 0x44 && tmp <= 0x45)
2049 2075 xeon++;
2050 2076 }
2051 2077
2052 2078 for (i = 0; i < 4; i++) {
2053 2079 uint_t tmp;
2054 2080
2055 2081 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2056 2082 if (tmp == 0x40)
2057 2083 celeron++;
2058 2084 else if (tmp >= 0x44 && tmp <= 0x45)
2059 2085 xeon++;
2060 2086 }
2061 2087
2062 2088 if (celeron)
2063 2089 return ("Intel Celeron(r)");
2064 2090 if (xeon)
2065 2091 return (cpi->cpi_model == 5 ?
2066 2092 "Intel Pentium(r) II Xeon(tm)" :
2067 2093 "Intel Pentium(r) III Xeon(tm)");
2068 2094 return (cpi->cpi_model == 5 ?
2069 2095 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2070 2096 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2071 2097 default:
2072 2098 break;
2073 2099 }
2074 2100 default:
2075 2101 break;
2076 2102 }
2077 2103
2078 2104 /* BrandID is present if the field is nonzero */
2079 2105 if (cpi->cpi_brandid != 0) {
2080 2106 static const struct {
2081 2107 uint_t bt_bid;
2082 2108 const char *bt_str;
2083 2109 } brand_tbl[] = {
2084 2110 { 0x1, "Intel(r) Celeron(r)" },
2085 2111 { 0x2, "Intel(r) Pentium(r) III" },
2086 2112 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2087 2113 { 0x4, "Intel(r) Pentium(r) III" },
2088 2114 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2089 2115 { 0x7, "Mobile Intel(r) Celeron(r)" },
2090 2116 { 0x8, "Intel(r) Pentium(r) 4" },
2091 2117 { 0x9, "Intel(r) Pentium(r) 4" },
2092 2118 { 0xa, "Intel(r) Celeron(r)" },
2093 2119 { 0xb, "Intel(r) Xeon(tm)" },
2094 2120 { 0xc, "Intel(r) Xeon(tm) MP" },
2095 2121 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2096 2122 { 0xf, "Mobile Intel(r) Celeron(r)" },
2097 2123 { 0x11, "Mobile Genuine Intel(r)" },
2098 2124 { 0x12, "Intel(r) Celeron(r) M" },
2099 2125 { 0x13, "Mobile Intel(r) Celeron(r)" },
2100 2126 { 0x14, "Intel(r) Celeron(r)" },
2101 2127 { 0x15, "Mobile Genuine Intel(r)" },
2102 2128 { 0x16, "Intel(r) Pentium(r) M" },
2103 2129 { 0x17, "Mobile Intel(r) Celeron(r)" }
2104 2130 };
2105 2131 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2106 2132 uint_t sgn;
2107 2133
2108 2134 sgn = (cpi->cpi_family << 8) |
2109 2135 (cpi->cpi_model << 4) | cpi->cpi_step;
2110 2136
2111 2137 for (i = 0; i < btblmax; i++)
2112 2138 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2113 2139 break;
2114 2140 if (i < btblmax) {
2115 2141 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2116 2142 return ("Intel(r) Celeron(r)");
2117 2143 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2118 2144 return ("Intel(r) Xeon(tm) MP");
2119 2145 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2120 2146 return ("Intel(r) Xeon(tm)");
2121 2147 return (brand_tbl[i].bt_str);
2122 2148 }
2123 2149 }
2124 2150
2125 2151 return (NULL);
2126 2152 }
2127 2153
2128 2154 static const char *
2129 2155 amd_cpubrand(const struct cpuid_info *cpi)
2130 2156 {
2131 2157 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2132 2158 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2133 2159 return ("i486 compatible");
2134 2160
2135 2161 switch (cpi->cpi_family) {
2136 2162 case 5:
2137 2163 switch (cpi->cpi_model) {
2138 2164 case 0:
2139 2165 case 1:
2140 2166 case 2:
2141 2167 case 3:
2142 2168 case 4:
2143 2169 case 5:
2144 2170 return ("AMD-K5(r)");
2145 2171 case 6:
2146 2172 case 7:
2147 2173 return ("AMD-K6(r)");
2148 2174 case 8:
2149 2175 return ("AMD-K6(r)-2");
2150 2176 case 9:
2151 2177 return ("AMD-K6(r)-III");
2152 2178 default:
2153 2179 return ("AMD (family 5)");
2154 2180 }
2155 2181 case 6:
2156 2182 switch (cpi->cpi_model) {
2157 2183 case 1:
2158 2184 return ("AMD-K7(tm)");
2159 2185 case 0:
2160 2186 case 2:
2161 2187 case 4:
2162 2188 return ("AMD Athlon(tm)");
2163 2189 case 3:
2164 2190 case 7:
2165 2191 return ("AMD Duron(tm)");
2166 2192 case 6:
2167 2193 case 8:
2168 2194 case 10:
2169 2195 /*
2170 2196 * Use the L2 cache size to distinguish
2171 2197 */
2172 2198 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2173 2199 "AMD Athlon(tm)" : "AMD Duron(tm)");
2174 2200 default:
2175 2201 return ("AMD (family 6)");
2176 2202 }
2177 2203 default:
2178 2204 break;
2179 2205 }
2180 2206
2181 2207 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2182 2208 cpi->cpi_brandid != 0) {
2183 2209 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2184 2210 case 3:
2185 2211 return ("AMD Opteron(tm) UP 1xx");
2186 2212 case 4:
2187 2213 return ("AMD Opteron(tm) DP 2xx");
2188 2214 case 5:
2189 2215 return ("AMD Opteron(tm) MP 8xx");
2190 2216 default:
2191 2217 return ("AMD Opteron(tm)");
2192 2218 }
2193 2219 }
2194 2220
2195 2221 return (NULL);
2196 2222 }
2197 2223
2198 2224 static const char *
2199 2225 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2200 2226 {
2201 2227 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2202 2228 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2203 2229 type == X86_TYPE_CYRIX_486)
2204 2230 return ("i486 compatible");
2205 2231
2206 2232 switch (type) {
2207 2233 case X86_TYPE_CYRIX_6x86:
2208 2234 return ("Cyrix 6x86");
2209 2235 case X86_TYPE_CYRIX_6x86L:
2210 2236 return ("Cyrix 6x86L");
2211 2237 case X86_TYPE_CYRIX_6x86MX:
2212 2238 return ("Cyrix 6x86MX");
2213 2239 case X86_TYPE_CYRIX_GXm:
2214 2240 return ("Cyrix GXm");
2215 2241 case X86_TYPE_CYRIX_MediaGX:
2216 2242 return ("Cyrix MediaGX");
2217 2243 case X86_TYPE_CYRIX_MII:
2218 2244 return ("Cyrix M2");
2219 2245 case X86_TYPE_VIA_CYRIX_III:
2220 2246 return ("VIA Cyrix M3");
2221 2247 default:
2222 2248 /*
2223 2249 * Have another wild guess ..
2224 2250 */
2225 2251 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2226 2252 return ("Cyrix 5x86");
2227 2253 else if (cpi->cpi_family == 5) {
2228 2254 switch (cpi->cpi_model) {
2229 2255 case 2:
2230 2256 return ("Cyrix 6x86"); /* Cyrix M1 */
2231 2257 case 4:
2232 2258 return ("Cyrix MediaGX");
2233 2259 default:
2234 2260 break;
2235 2261 }
2236 2262 } else if (cpi->cpi_family == 6) {
2237 2263 switch (cpi->cpi_model) {
2238 2264 case 0:
2239 2265 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2240 2266 case 5:
2241 2267 case 6:
2242 2268 case 7:
2243 2269 case 8:
2244 2270 case 9:
2245 2271 return ("VIA C3");
2246 2272 default:
2247 2273 break;
2248 2274 }
2249 2275 }
2250 2276 break;
2251 2277 }
2252 2278 return (NULL);
2253 2279 }
2254 2280
2255 2281 /*
2256 2282 * This only gets called in the case that the CPU extended
2257 2283 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2258 2284 * aren't available, or contain null bytes for some reason.
2259 2285 */
2260 2286 static void
2261 2287 fabricate_brandstr(struct cpuid_info *cpi)
2262 2288 {
2263 2289 const char *brand = NULL;
2264 2290
2265 2291 switch (cpi->cpi_vendor) {
2266 2292 case X86_VENDOR_Intel:
2267 2293 brand = intel_cpubrand(cpi);
2268 2294 break;
2269 2295 case X86_VENDOR_AMD:
2270 2296 brand = amd_cpubrand(cpi);
2271 2297 break;
2272 2298 case X86_VENDOR_Cyrix:
2273 2299 brand = cyrix_cpubrand(cpi, x86_type);
2274 2300 break;
2275 2301 case X86_VENDOR_NexGen:
2276 2302 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2277 2303 brand = "NexGen Nx586";
2278 2304 break;
2279 2305 case X86_VENDOR_Centaur:
2280 2306 if (cpi->cpi_family == 5)
2281 2307 switch (cpi->cpi_model) {
2282 2308 case 4:
2283 2309 brand = "Centaur C6";
2284 2310 break;
2285 2311 case 8:
2286 2312 brand = "Centaur C2";
2287 2313 break;
2288 2314 case 9:
2289 2315 brand = "Centaur C3";
2290 2316 break;
2291 2317 default:
2292 2318 break;
2293 2319 }
2294 2320 break;
2295 2321 case X86_VENDOR_Rise:
2296 2322 if (cpi->cpi_family == 5 &&
2297 2323 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2298 2324 brand = "Rise mP6";
2299 2325 break;
2300 2326 case X86_VENDOR_SiS:
2301 2327 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2302 2328 brand = "SiS 55x";
2303 2329 break;
2304 2330 case X86_VENDOR_TM:
2305 2331 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2306 2332 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2307 2333 break;
2308 2334 case X86_VENDOR_NSC:
2309 2335 case X86_VENDOR_UMC:
2310 2336 default:
2311 2337 break;
2312 2338 }
2313 2339 if (brand) {
2314 2340 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2315 2341 return;
2316 2342 }
2317 2343
2318 2344 /*
2319 2345 * If all else fails ...
2320 2346 */
2321 2347 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2322 2348 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2323 2349 cpi->cpi_model, cpi->cpi_step);
2324 2350 }
2325 2351
2326 2352 /*
2327 2353 * This routine is called just after kernel memory allocation
2328 2354 * becomes available on cpu0, and as part of mp_startup() on
2329 2355 * the other cpus.
2330 2356 *
2331 2357 * Fixup the brand string, and collect any information from cpuid
2332 2358 * that requires dynamicically allocated storage to represent.
2333 2359 */
2334 2360 /*ARGSUSED*/
2335 2361 void
2336 2362 cpuid_pass3(cpu_t *cpu)
2337 2363 {
2338 2364 int i, max, shft, level, size;
2339 2365 struct cpuid_regs regs;
2340 2366 struct cpuid_regs *cp;
2341 2367 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2342 2368
2343 2369 ASSERT(cpi->cpi_pass == 2);
2344 2370
2345 2371 /*
2346 2372 * Function 4: Deterministic cache parameters
2347 2373 *
2348 2374 * Take this opportunity to detect the number of threads
2349 2375 * sharing the last level cache, and construct a corresponding
2350 2376 * cache id. The respective cpuid_info members are initialized
2351 2377 * to the default case of "no last level cache sharing".
2352 2378 */
2353 2379 cpi->cpi_ncpu_shr_last_cache = 1;
2354 2380 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2355 2381
2356 2382 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2357 2383
2358 2384 /*
2359 2385 * Find the # of elements (size) returned by fn 4, and along
2360 2386 * the way detect last level cache sharing details.
2361 2387 */
2362 2388 bzero(®s, sizeof (regs));
2363 2389 cp = ®s;
2364 2390 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2365 2391 cp->cp_eax = 4;
2366 2392 cp->cp_ecx = i;
2367 2393
2368 2394 (void) __cpuid_insn(cp);
2369 2395
2370 2396 if (CPI_CACHE_TYPE(cp) == 0)
2371 2397 break;
2372 2398 level = CPI_CACHE_LVL(cp);
2373 2399 if (level > max) {
2374 2400 max = level;
2375 2401 cpi->cpi_ncpu_shr_last_cache =
2376 2402 CPI_NTHR_SHR_CACHE(cp) + 1;
2377 2403 }
2378 2404 }
2379 2405 cpi->cpi_std_4_size = size = i;
2380 2406
2381 2407 /*
2382 2408 * Allocate the cpi_std_4 array. The first element
2383 2409 * references the regs for fn 4, %ecx == 0, which
2384 2410 * cpuid_pass2() stashed in cpi->cpi_std[4].
2385 2411 */
2386 2412 if (size > 0) {
2387 2413 cpi->cpi_std_4 =
2388 2414 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2389 2415 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2390 2416
2391 2417 /*
2392 2418 * Allocate storage to hold the additional regs
2393 2419 * for function 4, %ecx == 1 .. cpi_std_4_size.
2394 2420 *
2395 2421 * The regs for fn 4, %ecx == 0 has already
2396 2422 * been allocated as indicated above.
2397 2423 */
2398 2424 for (i = 1; i < size; i++) {
2399 2425 cp = cpi->cpi_std_4[i] =
2400 2426 kmem_zalloc(sizeof (regs), KM_SLEEP);
2401 2427 cp->cp_eax = 4;
2402 2428 cp->cp_ecx = i;
2403 2429
2404 2430 (void) __cpuid_insn(cp);
2405 2431 }
2406 2432 }
2407 2433 /*
2408 2434 * Determine the number of bits needed to represent
2409 2435 * the number of CPUs sharing the last level cache.
2410 2436 *
2411 2437 * Shift off that number of bits from the APIC id to
2412 2438 * derive the cache id.
2413 2439 */
2414 2440 shft = 0;
2415 2441 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2416 2442 shft++;
2417 2443 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2418 2444 }
2419 2445
2420 2446 /*
2421 2447 * Now fixup the brand string
2422 2448 */
2423 2449 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2424 2450 fabricate_brandstr(cpi);
2425 2451 } else {
2426 2452
2427 2453 /*
2428 2454 * If we successfully extracted a brand string from the cpuid
2429 2455 * instruction, clean it up by removing leading spaces and
2430 2456 * similar junk.
2431 2457 */
2432 2458 if (cpi->cpi_brandstr[0]) {
2433 2459 size_t maxlen = sizeof (cpi->cpi_brandstr);
2434 2460 char *src, *dst;
2435 2461
2436 2462 dst = src = (char *)cpi->cpi_brandstr;
2437 2463 src[maxlen - 1] = '\0';
2438 2464 /*
2439 2465 * strip leading spaces
2440 2466 */
2441 2467 while (*src == ' ')
2442 2468 src++;
2443 2469 /*
2444 2470 * Remove any 'Genuine' or "Authentic" prefixes
2445 2471 */
2446 2472 if (strncmp(src, "Genuine ", 8) == 0)
2447 2473 src += 8;
2448 2474 if (strncmp(src, "Authentic ", 10) == 0)
2449 2475 src += 10;
2450 2476
2451 2477 /*
2452 2478 * Now do an in-place copy.
2453 2479 * Map (R) to (r) and (TM) to (tm).
2454 2480 * The era of teletypes is long gone, and there's
2455 2481 * -really- no need to shout.
2456 2482 */
2457 2483 while (*src != '\0') {
2458 2484 if (src[0] == '(') {
2459 2485 if (strncmp(src + 1, "R)", 2) == 0) {
2460 2486 (void) strncpy(dst, "(r)", 3);
2461 2487 src += 3;
2462 2488 dst += 3;
2463 2489 continue;
2464 2490 }
2465 2491 if (strncmp(src + 1, "TM)", 3) == 0) {
2466 2492 (void) strncpy(dst, "(tm)", 4);
2467 2493 src += 4;
2468 2494 dst += 4;
2469 2495 continue;
2470 2496 }
2471 2497 }
2472 2498 *dst++ = *src++;
2473 2499 }
2474 2500 *dst = '\0';
2475 2501
2476 2502 /*
2477 2503 * Finally, remove any trailing spaces
2478 2504 */
2479 2505 while (--dst > cpi->cpi_brandstr)
2480 2506 if (*dst == ' ')
2481 2507 *dst = '\0';
2482 2508 else
2483 2509 break;
2484 2510 } else
2485 2511 fabricate_brandstr(cpi);
2486 2512 }
2487 2513 cpi->cpi_pass = 3;
2488 2514 }
2489 2515
2490 2516 /*
2491 2517 * This routine is called out of bind_hwcap() much later in the life
2492 2518 * of the kernel (post_startup()). The job of this routine is to resolve
2493 2519 * the hardware feature support and kernel support for those features into
2494 2520 * what we're actually going to tell applications via the aux vector.
2495 2521 */
2496 2522 uint_t
2497 2523 cpuid_pass4(cpu_t *cpu)
2498 2524 {
2499 2525 struct cpuid_info *cpi;
2500 2526 uint_t hwcap_flags = 0;
2501 2527
2502 2528 if (cpu == NULL)
2503 2529 cpu = CPU;
2504 2530 cpi = cpu->cpu_m.mcpu_cpi;
2505 2531
2506 2532 ASSERT(cpi->cpi_pass == 3);
2507 2533
2508 2534 if (cpi->cpi_maxeax >= 1) {
2509 2535 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2510 2536 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2511 2537
2512 2538 *edx = CPI_FEATURES_EDX(cpi);
2513 2539 *ecx = CPI_FEATURES_ECX(cpi);
2514 2540
2515 2541 /*
2516 2542 * [these require explicit kernel support]
2517 2543 */
2518 2544 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2519 2545 *edx &= ~CPUID_INTC_EDX_SEP;
2520 2546
2521 2547 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2522 2548 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2523 2549 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2524 2550 *edx &= ~CPUID_INTC_EDX_SSE2;
2525 2551
2526 2552 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2527 2553 *edx &= ~CPUID_INTC_EDX_HTT;
2528 2554
2529 2555 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2530 2556 *ecx &= ~CPUID_INTC_ECX_SSE3;
2531 2557
2532 2558 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2533 2559 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2534 2560 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2535 2561 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2536 2562 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2537 2563 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2538 2564 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2539 2565 *ecx &= ~CPUID_INTC_ECX_AES;
2540 2566 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2541 2567 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2542 2568 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2543 2569 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2544 2570 CPUID_INTC_ECX_OSXSAVE);
2545 2571 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2546 2572 *ecx &= ~CPUID_INTC_ECX_AVX;
2547 2573
2548 2574 /*
2549 2575 * [no explicit support required beyond x87 fp context]
2550 2576 */
2551 2577 if (!fpu_exists)
2552 2578 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2553 2579
2554 2580 /*
2555 2581 * Now map the supported feature vector to things that we
2556 2582 * think userland will care about.
2557 2583 */
2558 2584 if (*edx & CPUID_INTC_EDX_SEP)
2559 2585 hwcap_flags |= AV_386_SEP;
2560 2586 if (*edx & CPUID_INTC_EDX_SSE)
2561 2587 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2562 2588 if (*edx & CPUID_INTC_EDX_SSE2)
2563 2589 hwcap_flags |= AV_386_SSE2;
2564 2590 if (*ecx & CPUID_INTC_ECX_SSE3)
2565 2591 hwcap_flags |= AV_386_SSE3;
2566 2592 if (*ecx & CPUID_INTC_ECX_SSSE3)
2567 2593 hwcap_flags |= AV_386_SSSE3;
2568 2594 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2569 2595 hwcap_flags |= AV_386_SSE4_1;
2570 2596 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2571 2597 hwcap_flags |= AV_386_SSE4_2;
2572 2598 if (*ecx & CPUID_INTC_ECX_MOVBE)
2573 2599 hwcap_flags |= AV_386_MOVBE;
2574 2600 if (*ecx & CPUID_INTC_ECX_AES)
2575 2601 hwcap_flags |= AV_386_AES;
2576 2602 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2577 2603 hwcap_flags |= AV_386_PCLMULQDQ;
2578 2604 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2579 2605 (*ecx & CPUID_INTC_ECX_OSXSAVE))
2580 2606 hwcap_flags |= AV_386_XSAVE;
2581 2607 if (*ecx & CPUID_INTC_ECX_VMX)
2582 2608 hwcap_flags |= AV_386_VMX;
2583 2609 if (*ecx & CPUID_INTC_ECX_POPCNT)
2584 2610 hwcap_flags |= AV_386_POPCNT;
2585 2611 if (*edx & CPUID_INTC_EDX_FPU)
2586 2612 hwcap_flags |= AV_386_FPU;
2587 2613 if (*edx & CPUID_INTC_EDX_MMX)
2588 2614 hwcap_flags |= AV_386_MMX;
2589 2615
2590 2616 if (*edx & CPUID_INTC_EDX_TSC)
2591 2617 hwcap_flags |= AV_386_TSC;
2592 2618 if (*edx & CPUID_INTC_EDX_CX8)
2593 2619 hwcap_flags |= AV_386_CX8;
2594 2620 if (*edx & CPUID_INTC_EDX_CMOV)
2595 2621 hwcap_flags |= AV_386_CMOV;
2596 2622 if (*ecx & CPUID_INTC_ECX_CX16)
2597 2623 hwcap_flags |= AV_386_CX16;
2598 2624 }
2599 2625
2600 2626 if (cpi->cpi_xmaxeax < 0x80000001)
2601 2627 goto pass4_done;
2602 2628
2603 2629 switch (cpi->cpi_vendor) {
2604 2630 struct cpuid_regs cp;
2605 2631 uint32_t *edx, *ecx;
2606 2632
2607 2633 case X86_VENDOR_Intel:
2608 2634 /*
2609 2635 * Seems like Intel duplicated what we necessary
2610 2636 * here to make the initial crop of 64-bit OS's work.
2611 2637 * Hopefully, those are the only "extended" bits
2612 2638 * they'll add.
2613 2639 */
2614 2640 /*FALLTHROUGH*/
2615 2641
2616 2642 case X86_VENDOR_AMD:
2617 2643 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2618 2644 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2619 2645
2620 2646 *edx = CPI_FEATURES_XTD_EDX(cpi);
2621 2647 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2622 2648
2623 2649 /*
2624 2650 * [these features require explicit kernel support]
2625 2651 */
2626 2652 switch (cpi->cpi_vendor) {
2627 2653 case X86_VENDOR_Intel:
2628 2654 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2629 2655 *edx &= ~CPUID_AMD_EDX_TSCP;
2630 2656 break;
2631 2657
2632 2658 case X86_VENDOR_AMD:
2633 2659 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2634 2660 *edx &= ~CPUID_AMD_EDX_TSCP;
2635 2661 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2636 2662 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2637 2663 break;
2638 2664
2639 2665 default:
2640 2666 break;
2641 2667 }
2642 2668
2643 2669 /*
2644 2670 * [no explicit support required beyond
2645 2671 * x87 fp context and exception handlers]
2646 2672 */
2647 2673 if (!fpu_exists)
2648 2674 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2649 2675 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2650 2676
2651 2677 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2652 2678 *edx &= ~CPUID_AMD_EDX_NX;
2653 2679 #if !defined(__amd64)
2654 2680 *edx &= ~CPUID_AMD_EDX_LM;
2655 2681 #endif
2656 2682 /*
2657 2683 * Now map the supported feature vector to
2658 2684 * things that we think userland will care about.
2659 2685 */
2660 2686 #if defined(__amd64)
2661 2687 if (*edx & CPUID_AMD_EDX_SYSC)
2662 2688 hwcap_flags |= AV_386_AMD_SYSC;
2663 2689 #endif
2664 2690 if (*edx & CPUID_AMD_EDX_MMXamd)
2665 2691 hwcap_flags |= AV_386_AMD_MMX;
2666 2692 if (*edx & CPUID_AMD_EDX_3DNow)
2667 2693 hwcap_flags |= AV_386_AMD_3DNow;
2668 2694 if (*edx & CPUID_AMD_EDX_3DNowx)
2669 2695 hwcap_flags |= AV_386_AMD_3DNowx;
2670 2696 if (*ecx & CPUID_AMD_ECX_SVM)
2671 2697 hwcap_flags |= AV_386_AMD_SVM;
2672 2698
2673 2699 switch (cpi->cpi_vendor) {
2674 2700 case X86_VENDOR_AMD:
2675 2701 if (*edx & CPUID_AMD_EDX_TSCP)
2676 2702 hwcap_flags |= AV_386_TSCP;
2677 2703 if (*ecx & CPUID_AMD_ECX_AHF64)
2678 2704 hwcap_flags |= AV_386_AHF;
2679 2705 if (*ecx & CPUID_AMD_ECX_SSE4A)
2680 2706 hwcap_flags |= AV_386_AMD_SSE4A;
2681 2707 if (*ecx & CPUID_AMD_ECX_LZCNT)
2682 2708 hwcap_flags |= AV_386_AMD_LZCNT;
2683 2709 break;
2684 2710
2685 2711 case X86_VENDOR_Intel:
2686 2712 if (*edx & CPUID_AMD_EDX_TSCP)
2687 2713 hwcap_flags |= AV_386_TSCP;
2688 2714 /*
2689 2715 * Aarrgh.
2690 2716 * Intel uses a different bit in the same word.
2691 2717 */
2692 2718 if (*ecx & CPUID_INTC_ECX_AHF64)
2693 2719 hwcap_flags |= AV_386_AHF;
2694 2720 break;
2695 2721
2696 2722 default:
2697 2723 break;
2698 2724 }
2699 2725 break;
2700 2726
2701 2727 case X86_VENDOR_TM:
2702 2728 cp.cp_eax = 0x80860001;
2703 2729 (void) __cpuid_insn(&cp);
2704 2730 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2705 2731 break;
2706 2732
2707 2733 default:
2708 2734 break;
2709 2735 }
2710 2736
2711 2737 pass4_done:
2712 2738 cpi->cpi_pass = 4;
2713 2739 return (hwcap_flags);
2714 2740 }
2715 2741
2716 2742
2717 2743 /*
2718 2744 * Simulate the cpuid instruction using the data we previously
2719 2745 * captured about this CPU. We try our best to return the truth
2720 2746 * about the hardware, independently of kernel support.
2721 2747 */
2722 2748 uint32_t
2723 2749 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2724 2750 {
2725 2751 struct cpuid_info *cpi;
2726 2752 struct cpuid_regs *xcp;
2727 2753
2728 2754 if (cpu == NULL)
2729 2755 cpu = CPU;
2730 2756 cpi = cpu->cpu_m.mcpu_cpi;
2731 2757
2732 2758 ASSERT(cpuid_checkpass(cpu, 3));
2733 2759
2734 2760 /*
2735 2761 * CPUID data is cached in two separate places: cpi_std for standard
2736 2762 * CPUID functions, and cpi_extd for extended CPUID functions.
2737 2763 */
2738 2764 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2739 2765 xcp = &cpi->cpi_std[cp->cp_eax];
2740 2766 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2741 2767 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2742 2768 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2743 2769 else
2744 2770 /*
2745 2771 * The caller is asking for data from an input parameter which
2746 2772 * the kernel has not cached. In this case we go fetch from
2747 2773 * the hardware and return the data directly to the user.
2748 2774 */
2749 2775 return (__cpuid_insn(cp));
2750 2776
2751 2777 cp->cp_eax = xcp->cp_eax;
2752 2778 cp->cp_ebx = xcp->cp_ebx;
2753 2779 cp->cp_ecx = xcp->cp_ecx;
2754 2780 cp->cp_edx = xcp->cp_edx;
2755 2781 return (cp->cp_eax);
2756 2782 }
2757 2783
2758 2784 int
2759 2785 cpuid_checkpass(cpu_t *cpu, int pass)
2760 2786 {
2761 2787 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2762 2788 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2763 2789 }
2764 2790
2765 2791 int
2766 2792 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2767 2793 {
2768 2794 ASSERT(cpuid_checkpass(cpu, 3));
2769 2795
2770 2796 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2771 2797 }
2772 2798
2773 2799 int
2774 2800 cpuid_is_cmt(cpu_t *cpu)
2775 2801 {
2776 2802 if (cpu == NULL)
2777 2803 cpu = CPU;
2778 2804
2779 2805 ASSERT(cpuid_checkpass(cpu, 1));
2780 2806
2781 2807 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2782 2808 }
2783 2809
2784 2810 /*
2785 2811 * AMD and Intel both implement the 64-bit variant of the syscall
2786 2812 * instruction (syscallq), so if there's -any- support for syscall,
2787 2813 * cpuid currently says "yes, we support this".
2788 2814 *
2789 2815 * However, Intel decided to -not- implement the 32-bit variant of the
2790 2816 * syscall instruction, so we provide a predicate to allow our caller
2791 2817 * to test that subtlety here.
2792 2818 *
2793 2819 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2794 2820 * even in the case where the hardware would in fact support it.
2795 2821 */
2796 2822 /*ARGSUSED*/
2797 2823 int
2798 2824 cpuid_syscall32_insn(cpu_t *cpu)
2799 2825 {
2800 2826 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2801 2827
2802 2828 #if !defined(__xpv)
2803 2829 if (cpu == NULL)
2804 2830 cpu = CPU;
2805 2831
2806 2832 /*CSTYLED*/
2807 2833 {
2808 2834 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2809 2835
2810 2836 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2811 2837 cpi->cpi_xmaxeax >= 0x80000001 &&
2812 2838 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2813 2839 return (1);
2814 2840 }
2815 2841 #endif
2816 2842 return (0);
2817 2843 }
2818 2844
2819 2845 int
2820 2846 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2821 2847 {
2822 2848 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2823 2849
2824 2850 static const char fmt[] =
2825 2851 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2826 2852 static const char fmt_ht[] =
2827 2853 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2828 2854
2829 2855 ASSERT(cpuid_checkpass(cpu, 1));
2830 2856
2831 2857 if (cpuid_is_cmt(cpu))
2832 2858 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2833 2859 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2834 2860 cpi->cpi_family, cpi->cpi_model,
2835 2861 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2836 2862 return (snprintf(s, n, fmt,
2837 2863 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2838 2864 cpi->cpi_family, cpi->cpi_model,
2839 2865 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2840 2866 }
2841 2867
2842 2868 const char *
2843 2869 cpuid_getvendorstr(cpu_t *cpu)
2844 2870 {
2845 2871 ASSERT(cpuid_checkpass(cpu, 1));
2846 2872 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2847 2873 }
2848 2874
2849 2875 uint_t
2850 2876 cpuid_getvendor(cpu_t *cpu)
2851 2877 {
2852 2878 ASSERT(cpuid_checkpass(cpu, 1));
2853 2879 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2854 2880 }
2855 2881
2856 2882 uint_t
2857 2883 cpuid_getfamily(cpu_t *cpu)
2858 2884 {
2859 2885 ASSERT(cpuid_checkpass(cpu, 1));
2860 2886 return (cpu->cpu_m.mcpu_cpi->cpi_family);
2861 2887 }
2862 2888
2863 2889 uint_t
2864 2890 cpuid_getmodel(cpu_t *cpu)
2865 2891 {
2866 2892 ASSERT(cpuid_checkpass(cpu, 1));
2867 2893 return (cpu->cpu_m.mcpu_cpi->cpi_model);
2868 2894 }
2869 2895
2870 2896 uint_t
2871 2897 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2872 2898 {
2873 2899 ASSERT(cpuid_checkpass(cpu, 1));
2874 2900 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2875 2901 }
2876 2902
2877 2903 uint_t
2878 2904 cpuid_get_ncore_per_chip(cpu_t *cpu)
2879 2905 {
2880 2906 ASSERT(cpuid_checkpass(cpu, 1));
2881 2907 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2882 2908 }
2883 2909
2884 2910 uint_t
2885 2911 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2886 2912 {
2887 2913 ASSERT(cpuid_checkpass(cpu, 2));
2888 2914 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2889 2915 }
2890 2916
2891 2917 id_t
2892 2918 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2893 2919 {
2894 2920 ASSERT(cpuid_checkpass(cpu, 2));
2895 2921 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2896 2922 }
2897 2923
2898 2924 uint_t
2899 2925 cpuid_getstep(cpu_t *cpu)
2900 2926 {
2901 2927 ASSERT(cpuid_checkpass(cpu, 1));
2902 2928 return (cpu->cpu_m.mcpu_cpi->cpi_step);
2903 2929 }
2904 2930
2905 2931 uint_t
2906 2932 cpuid_getsig(struct cpu *cpu)
2907 2933 {
2908 2934 ASSERT(cpuid_checkpass(cpu, 1));
2909 2935 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2910 2936 }
2911 2937
2912 2938 uint32_t
2913 2939 cpuid_getchiprev(struct cpu *cpu)
2914 2940 {
2915 2941 ASSERT(cpuid_checkpass(cpu, 1));
2916 2942 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2917 2943 }
2918 2944
2919 2945 const char *
2920 2946 cpuid_getchiprevstr(struct cpu *cpu)
2921 2947 {
2922 2948 ASSERT(cpuid_checkpass(cpu, 1));
2923 2949 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2924 2950 }
2925 2951
2926 2952 uint32_t
2927 2953 cpuid_getsockettype(struct cpu *cpu)
2928 2954 {
2929 2955 ASSERT(cpuid_checkpass(cpu, 1));
2930 2956 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2931 2957 }
2932 2958
2933 2959 const char *
2934 2960 cpuid_getsocketstr(cpu_t *cpu)
2935 2961 {
2936 2962 static const char *socketstr = NULL;
2937 2963 struct cpuid_info *cpi;
2938 2964
2939 2965 ASSERT(cpuid_checkpass(cpu, 1));
2940 2966 cpi = cpu->cpu_m.mcpu_cpi;
2941 2967
2942 2968 /* Assume that socket types are the same across the system */
2943 2969 if (socketstr == NULL)
2944 2970 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2945 2971 cpi->cpi_model, cpi->cpi_step);
2946 2972
2947 2973
2948 2974 return (socketstr);
2949 2975 }
2950 2976
2951 2977 int
2952 2978 cpuid_get_chipid(cpu_t *cpu)
2953 2979 {
2954 2980 ASSERT(cpuid_checkpass(cpu, 1));
2955 2981
2956 2982 if (cpuid_is_cmt(cpu))
2957 2983 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2958 2984 return (cpu->cpu_id);
2959 2985 }
2960 2986
2961 2987 id_t
2962 2988 cpuid_get_coreid(cpu_t *cpu)
2963 2989 {
2964 2990 ASSERT(cpuid_checkpass(cpu, 1));
2965 2991 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2966 2992 }
2967 2993
2968 2994 int
2969 2995 cpuid_get_pkgcoreid(cpu_t *cpu)
2970 2996 {
2971 2997 ASSERT(cpuid_checkpass(cpu, 1));
2972 2998 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
2973 2999 }
2974 3000
2975 3001 int
2976 3002 cpuid_get_clogid(cpu_t *cpu)
2977 3003 {
2978 3004 ASSERT(cpuid_checkpass(cpu, 1));
2979 3005 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2980 3006 }
2981 3007
2982 3008 int
2983 3009 cpuid_get_cacheid(cpu_t *cpu)
2984 3010 {
2985 3011 ASSERT(cpuid_checkpass(cpu, 1));
2986 3012 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2987 3013 }
2988 3014
2989 3015 uint_t
2990 3016 cpuid_get_procnodeid(cpu_t *cpu)
2991 3017 {
2992 3018 ASSERT(cpuid_checkpass(cpu, 1));
↓ open down ↓ |
1409 lines elided |
↑ open up ↑ |
2993 3019 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2994 3020 }
2995 3021
2996 3022 uint_t
2997 3023 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
2998 3024 {
2999 3025 ASSERT(cpuid_checkpass(cpu, 1));
3000 3026 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3001 3027 }
3002 3028
3029 +uint_t
3030 +cpuid_get_compunitid(cpu_t *cpu)
3031 +{
3032 + ASSERT(cpuid_checkpass(cpu, 1));
3033 + return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3034 +}
3035 +
3036 +uint_t
3037 +cpuid_get_cores_per_compunit(cpu_t *cpu)
3038 +{
3039 + ASSERT(cpuid_checkpass(cpu, 1));
3040 + return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3041 +}
3042 +
3003 3043 /*ARGSUSED*/
3004 3044 int
3005 3045 cpuid_have_cr8access(cpu_t *cpu)
3006 3046 {
3007 3047 #if defined(__amd64)
3008 3048 return (1);
3009 3049 #else
3010 3050 struct cpuid_info *cpi;
3011 3051
3012 3052 ASSERT(cpu != NULL);
3013 3053 cpi = cpu->cpu_m.mcpu_cpi;
3014 3054 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3015 3055 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3016 3056 return (1);
3017 3057 return (0);
3018 3058 #endif
3019 3059 }
3020 3060
3021 3061 uint32_t
3022 3062 cpuid_get_apicid(cpu_t *cpu)
3023 3063 {
3024 3064 ASSERT(cpuid_checkpass(cpu, 1));
3025 3065 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3026 3066 return (UINT32_MAX);
3027 3067 } else {
3028 3068 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3029 3069 }
3030 3070 }
3031 3071
3032 3072 void
3033 3073 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3034 3074 {
3035 3075 struct cpuid_info *cpi;
3036 3076
3037 3077 if (cpu == NULL)
3038 3078 cpu = CPU;
3039 3079 cpi = cpu->cpu_m.mcpu_cpi;
3040 3080
3041 3081 ASSERT(cpuid_checkpass(cpu, 1));
3042 3082
3043 3083 if (pabits)
3044 3084 *pabits = cpi->cpi_pabits;
3045 3085 if (vabits)
3046 3086 *vabits = cpi->cpi_vabits;
3047 3087 }
3048 3088
3049 3089 /*
3050 3090 * Returns the number of data TLB entries for a corresponding
3051 3091 * pagesize. If it can't be computed, or isn't known, the
3052 3092 * routine returns zero. If you ask about an architecturally
3053 3093 * impossible pagesize, the routine will panic (so that the
3054 3094 * hat implementor knows that things are inconsistent.)
3055 3095 */
3056 3096 uint_t
3057 3097 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3058 3098 {
3059 3099 struct cpuid_info *cpi;
3060 3100 uint_t dtlb_nent = 0;
3061 3101
3062 3102 if (cpu == NULL)
3063 3103 cpu = CPU;
3064 3104 cpi = cpu->cpu_m.mcpu_cpi;
3065 3105
3066 3106 ASSERT(cpuid_checkpass(cpu, 1));
3067 3107
3068 3108 /*
3069 3109 * Check the L2 TLB info
3070 3110 */
3071 3111 if (cpi->cpi_xmaxeax >= 0x80000006) {
3072 3112 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3073 3113
3074 3114 switch (pagesize) {
3075 3115
3076 3116 case 4 * 1024:
3077 3117 /*
3078 3118 * All zero in the top 16 bits of the register
3079 3119 * indicates a unified TLB. Size is in low 16 bits.
3080 3120 */
3081 3121 if ((cp->cp_ebx & 0xffff0000) == 0)
3082 3122 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3083 3123 else
3084 3124 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3085 3125 break;
3086 3126
3087 3127 case 2 * 1024 * 1024:
3088 3128 if ((cp->cp_eax & 0xffff0000) == 0)
3089 3129 dtlb_nent = cp->cp_eax & 0x0000ffff;
3090 3130 else
3091 3131 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3092 3132 break;
3093 3133
3094 3134 default:
3095 3135 panic("unknown L2 pagesize");
3096 3136 /*NOTREACHED*/
3097 3137 }
3098 3138 }
3099 3139
3100 3140 if (dtlb_nent != 0)
3101 3141 return (dtlb_nent);
3102 3142
3103 3143 /*
3104 3144 * No L2 TLB support for this size, try L1.
3105 3145 */
3106 3146 if (cpi->cpi_xmaxeax >= 0x80000005) {
3107 3147 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3108 3148
3109 3149 switch (pagesize) {
3110 3150 case 4 * 1024:
3111 3151 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3112 3152 break;
3113 3153 case 2 * 1024 * 1024:
3114 3154 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3115 3155 break;
3116 3156 default:
3117 3157 panic("unknown L1 d-TLB pagesize");
3118 3158 /*NOTREACHED*/
3119 3159 }
3120 3160 }
3121 3161
3122 3162 return (dtlb_nent);
3123 3163 }
3124 3164
3125 3165 /*
3126 3166 * Return 0 if the erratum is not present or not applicable, positive
3127 3167 * if it is, and negative if the status of the erratum is unknown.
3128 3168 *
3129 3169 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3130 3170 * Processors" #25759, Rev 3.57, August 2005
3131 3171 */
3132 3172 int
3133 3173 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3134 3174 {
3135 3175 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3136 3176 uint_t eax;
3137 3177
3138 3178 /*
3139 3179 * Bail out if this CPU isn't an AMD CPU, or if it's
3140 3180 * a legacy (32-bit) AMD CPU.
3141 3181 */
3142 3182 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3143 3183 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3144 3184 cpi->cpi_family == 6)
3145 3185
3146 3186 return (0);
3147 3187
3148 3188 eax = cpi->cpi_std[1].cp_eax;
3149 3189
3150 3190 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3151 3191 #define SH_B3(eax) (eax == 0xf51)
3152 3192 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3153 3193
3154 3194 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3155 3195
3156 3196 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3157 3197 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3158 3198 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3159 3199 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3160 3200
3161 3201 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3162 3202 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3163 3203 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3164 3204 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3165 3205
3166 3206 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3167 3207 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3168 3208 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3169 3209 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3170 3210 #define BH_E4(eax) (eax == 0x20fb1)
3171 3211 #define SH_E5(eax) (eax == 0x20f42)
3172 3212 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3173 3213 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3174 3214 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3175 3215 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3176 3216 DH_E6(eax) || JH_E6(eax))
3177 3217
3178 3218 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3179 3219 #define DR_B0(eax) (eax == 0x100f20)
3180 3220 #define DR_B1(eax) (eax == 0x100f21)
3181 3221 #define DR_BA(eax) (eax == 0x100f2a)
3182 3222 #define DR_B2(eax) (eax == 0x100f22)
3183 3223 #define DR_B3(eax) (eax == 0x100f23)
3184 3224 #define RB_C0(eax) (eax == 0x100f40)
3185 3225
3186 3226 switch (erratum) {
3187 3227 case 1:
3188 3228 return (cpi->cpi_family < 0x10);
3189 3229 case 51: /* what does the asterisk mean? */
3190 3230 return (B(eax) || SH_C0(eax) || CG(eax));
3191 3231 case 52:
3192 3232 return (B(eax));
3193 3233 case 57:
3194 3234 return (cpi->cpi_family <= 0x11);
3195 3235 case 58:
3196 3236 return (B(eax));
3197 3237 case 60:
3198 3238 return (cpi->cpi_family <= 0x11);
3199 3239 case 61:
3200 3240 case 62:
3201 3241 case 63:
3202 3242 case 64:
3203 3243 case 65:
3204 3244 case 66:
3205 3245 case 68:
3206 3246 case 69:
3207 3247 case 70:
3208 3248 case 71:
3209 3249 return (B(eax));
3210 3250 case 72:
3211 3251 return (SH_B0(eax));
3212 3252 case 74:
3213 3253 return (B(eax));
3214 3254 case 75:
3215 3255 return (cpi->cpi_family < 0x10);
3216 3256 case 76:
3217 3257 return (B(eax));
3218 3258 case 77:
3219 3259 return (cpi->cpi_family <= 0x11);
3220 3260 case 78:
3221 3261 return (B(eax) || SH_C0(eax));
3222 3262 case 79:
3223 3263 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3224 3264 case 80:
3225 3265 case 81:
3226 3266 case 82:
3227 3267 return (B(eax));
3228 3268 case 83:
3229 3269 return (B(eax) || SH_C0(eax) || CG(eax));
3230 3270 case 85:
3231 3271 return (cpi->cpi_family < 0x10);
3232 3272 case 86:
3233 3273 return (SH_C0(eax) || CG(eax));
3234 3274 case 88:
3235 3275 #if !defined(__amd64)
3236 3276 return (0);
3237 3277 #else
3238 3278 return (B(eax) || SH_C0(eax));
3239 3279 #endif
3240 3280 case 89:
3241 3281 return (cpi->cpi_family < 0x10);
3242 3282 case 90:
3243 3283 return (B(eax) || SH_C0(eax) || CG(eax));
3244 3284 case 91:
3245 3285 case 92:
3246 3286 return (B(eax) || SH_C0(eax));
3247 3287 case 93:
3248 3288 return (SH_C0(eax));
3249 3289 case 94:
3250 3290 return (B(eax) || SH_C0(eax) || CG(eax));
3251 3291 case 95:
3252 3292 #if !defined(__amd64)
3253 3293 return (0);
3254 3294 #else
3255 3295 return (B(eax) || SH_C0(eax));
3256 3296 #endif
3257 3297 case 96:
3258 3298 return (B(eax) || SH_C0(eax) || CG(eax));
3259 3299 case 97:
3260 3300 case 98:
3261 3301 return (SH_C0(eax) || CG(eax));
3262 3302 case 99:
3263 3303 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3264 3304 case 100:
3265 3305 return (B(eax) || SH_C0(eax));
3266 3306 case 101:
3267 3307 case 103:
3268 3308 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3269 3309 case 104:
3270 3310 return (SH_C0(eax) || CG(eax) || D0(eax));
3271 3311 case 105:
3272 3312 case 106:
3273 3313 case 107:
3274 3314 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3275 3315 case 108:
3276 3316 return (DH_CG(eax));
3277 3317 case 109:
3278 3318 return (SH_C0(eax) || CG(eax) || D0(eax));
3279 3319 case 110:
3280 3320 return (D0(eax) || EX(eax));
3281 3321 case 111:
3282 3322 return (CG(eax));
3283 3323 case 112:
3284 3324 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3285 3325 case 113:
3286 3326 return (eax == 0x20fc0);
3287 3327 case 114:
3288 3328 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3289 3329 case 115:
3290 3330 return (SH_E0(eax) || JH_E1(eax));
3291 3331 case 116:
3292 3332 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3293 3333 case 117:
3294 3334 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3295 3335 case 118:
3296 3336 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3297 3337 JH_E6(eax));
3298 3338 case 121:
3299 3339 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3300 3340 case 122:
3301 3341 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3302 3342 case 123:
3303 3343 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3304 3344 case 131:
3305 3345 return (cpi->cpi_family < 0x10);
3306 3346 case 6336786:
3307 3347 /*
3308 3348 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3309 3349 * if this is a K8 family or newer processor
3310 3350 */
3311 3351 if (CPI_FAMILY(cpi) == 0xf) {
3312 3352 struct cpuid_regs regs;
3313 3353 regs.cp_eax = 0x80000007;
3314 3354 (void) __cpuid_insn(®s);
3315 3355 return (!(regs.cp_edx & 0x100));
3316 3356 }
3317 3357 return (0);
3318 3358 case 6323525:
3319 3359 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3320 3360 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3321 3361
3322 3362 case 6671130:
3323 3363 /*
3324 3364 * check for processors (pre-Shanghai) that do not provide
3325 3365 * optimal management of 1gb ptes in its tlb.
3326 3366 */
3327 3367 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3328 3368
3329 3369 case 298:
3330 3370 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3331 3371 DR_B2(eax) || RB_C0(eax));
3332 3372
3333 3373 case 721:
3334 3374 #if defined(__amd64)
3335 3375 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3336 3376 #else
3337 3377 return (0);
3338 3378 #endif
3339 3379
3340 3380 default:
3341 3381 return (-1);
3342 3382
3343 3383 }
3344 3384 }
3345 3385
3346 3386 /*
3347 3387 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3348 3388 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3349 3389 */
3350 3390 int
3351 3391 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3352 3392 {
3353 3393 struct cpuid_info *cpi;
3354 3394 uint_t osvwid;
3355 3395 static int osvwfeature = -1;
3356 3396 uint64_t osvwlength;
3357 3397
3358 3398
3359 3399 cpi = cpu->cpu_m.mcpu_cpi;
3360 3400
3361 3401 /* confirm OSVW supported */
3362 3402 if (osvwfeature == -1) {
3363 3403 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3364 3404 } else {
3365 3405 /* assert that osvw feature setting is consistent on all cpus */
3366 3406 ASSERT(osvwfeature ==
3367 3407 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3368 3408 }
3369 3409 if (!osvwfeature)
3370 3410 return (-1);
3371 3411
3372 3412 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3373 3413
3374 3414 switch (erratum) {
3375 3415 case 298: /* osvwid is 0 */
3376 3416 osvwid = 0;
3377 3417 if (osvwlength <= (uint64_t)osvwid) {
3378 3418 /* osvwid 0 is unknown */
3379 3419 return (-1);
3380 3420 }
3381 3421
3382 3422 /*
3383 3423 * Check the OSVW STATUS MSR to determine the state
3384 3424 * of the erratum where:
3385 3425 * 0 - fixed by HW
3386 3426 * 1 - BIOS has applied the workaround when BIOS
3387 3427 * workaround is available. (Or for other errata,
3388 3428 * OS workaround is required.)
3389 3429 * For a value of 1, caller will confirm that the
3390 3430 * erratum 298 workaround has indeed been applied by BIOS.
3391 3431 *
3392 3432 * A 1 may be set in cpus that have a HW fix
3393 3433 * in a mixed cpu system. Regarding erratum 298:
3394 3434 * In a multiprocessor platform, the workaround above
3395 3435 * should be applied to all processors regardless of
3396 3436 * silicon revision when an affected processor is
3397 3437 * present.
3398 3438 */
3399 3439
3400 3440 return (rdmsr(MSR_AMD_OSVW_STATUS +
3401 3441 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3402 3442 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3403 3443
3404 3444 default:
3405 3445 return (-1);
3406 3446 }
3407 3447 }
3408 3448
3409 3449 static const char assoc_str[] = "associativity";
3410 3450 static const char line_str[] = "line-size";
3411 3451 static const char size_str[] = "size";
3412 3452
3413 3453 static void
3414 3454 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3415 3455 uint32_t val)
3416 3456 {
3417 3457 char buf[128];
3418 3458
3419 3459 /*
3420 3460 * ndi_prop_update_int() is used because it is desirable for
3421 3461 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3422 3462 */
3423 3463 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3424 3464 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3425 3465 }
3426 3466
3427 3467 /*
3428 3468 * Intel-style cache/tlb description
3429 3469 *
3430 3470 * Standard cpuid level 2 gives a randomly ordered
3431 3471 * selection of tags that index into a table that describes
3432 3472 * cache and tlb properties.
3433 3473 */
3434 3474
3435 3475 static const char l1_icache_str[] = "l1-icache";
3436 3476 static const char l1_dcache_str[] = "l1-dcache";
3437 3477 static const char l2_cache_str[] = "l2-cache";
3438 3478 static const char l3_cache_str[] = "l3-cache";
3439 3479 static const char itlb4k_str[] = "itlb-4K";
3440 3480 static const char dtlb4k_str[] = "dtlb-4K";
3441 3481 static const char itlb2M_str[] = "itlb-2M";
3442 3482 static const char itlb4M_str[] = "itlb-4M";
3443 3483 static const char dtlb4M_str[] = "dtlb-4M";
3444 3484 static const char dtlb24_str[] = "dtlb0-2M-4M";
3445 3485 static const char itlb424_str[] = "itlb-4K-2M-4M";
3446 3486 static const char itlb24_str[] = "itlb-2M-4M";
3447 3487 static const char dtlb44_str[] = "dtlb-4K-4M";
3448 3488 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3449 3489 static const char sl2_cache_str[] = "sectored-l2-cache";
3450 3490 static const char itrace_str[] = "itrace-cache";
3451 3491 static const char sl3_cache_str[] = "sectored-l3-cache";
3452 3492 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3453 3493
3454 3494 static const struct cachetab {
3455 3495 uint8_t ct_code;
3456 3496 uint8_t ct_assoc;
3457 3497 uint16_t ct_line_size;
3458 3498 size_t ct_size;
3459 3499 const char *ct_label;
3460 3500 } intel_ctab[] = {
3461 3501 /*
3462 3502 * maintain descending order!
3463 3503 *
3464 3504 * Codes ignored - Reason
3465 3505 * ----------------------
3466 3506 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3467 3507 * f0H/f1H - Currently we do not interpret prefetch size by design
3468 3508 */
3469 3509 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3470 3510 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3471 3511 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3472 3512 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3473 3513 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3474 3514 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3475 3515 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3476 3516 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3477 3517 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3478 3518 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3479 3519 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3480 3520 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3481 3521 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3482 3522 { 0xc0, 4, 0, 8, dtlb44_str },
3483 3523 { 0xba, 4, 0, 64, dtlb4k_str },
3484 3524 { 0xb4, 4, 0, 256, dtlb4k_str },
3485 3525 { 0xb3, 4, 0, 128, dtlb4k_str },
3486 3526 { 0xb2, 4, 0, 64, itlb4k_str },
3487 3527 { 0xb0, 4, 0, 128, itlb4k_str },
3488 3528 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3489 3529 { 0x86, 4, 64, 512*1024, l2_cache_str},
3490 3530 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3491 3531 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3492 3532 { 0x83, 8, 32, 512*1024, l2_cache_str},
3493 3533 { 0x82, 8, 32, 256*1024, l2_cache_str},
3494 3534 { 0x80, 8, 64, 512*1024, l2_cache_str},
3495 3535 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3496 3536 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3497 3537 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3498 3538 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3499 3539 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3500 3540 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3501 3541 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3502 3542 { 0x73, 8, 0, 64*1024, itrace_str},
3503 3543 { 0x72, 8, 0, 32*1024, itrace_str},
3504 3544 { 0x71, 8, 0, 16*1024, itrace_str},
3505 3545 { 0x70, 8, 0, 12*1024, itrace_str},
3506 3546 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3507 3547 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3508 3548 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3509 3549 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3510 3550 { 0x5d, 0, 0, 256, dtlb44_str},
3511 3551 { 0x5c, 0, 0, 128, dtlb44_str},
3512 3552 { 0x5b, 0, 0, 64, dtlb44_str},
3513 3553 { 0x5a, 4, 0, 32, dtlb24_str},
3514 3554 { 0x59, 0, 0, 16, dtlb4k_str},
3515 3555 { 0x57, 4, 0, 16, dtlb4k_str},
3516 3556 { 0x56, 4, 0, 16, dtlb4M_str},
3517 3557 { 0x55, 0, 0, 7, itlb24_str},
3518 3558 { 0x52, 0, 0, 256, itlb424_str},
3519 3559 { 0x51, 0, 0, 128, itlb424_str},
3520 3560 { 0x50, 0, 0, 64, itlb424_str},
3521 3561 { 0x4f, 0, 0, 32, itlb4k_str},
3522 3562 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3523 3563 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3524 3564 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3525 3565 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3526 3566 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3527 3567 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3528 3568 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3529 3569 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3530 3570 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3531 3571 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3532 3572 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3533 3573 { 0x43, 4, 32, 512*1024, l2_cache_str},
3534 3574 { 0x42, 4, 32, 256*1024, l2_cache_str},
3535 3575 { 0x41, 4, 32, 128*1024, l2_cache_str},
3536 3576 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3537 3577 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3538 3578 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3539 3579 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3540 3580 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3541 3581 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3542 3582 { 0x30, 8, 64, 32*1024, l1_icache_str},
3543 3583 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3544 3584 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3545 3585 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3546 3586 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3547 3587 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3548 3588 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3549 3589 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3550 3590 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3551 3591 { 0x0b, 4, 0, 4, itlb4M_str},
3552 3592 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3553 3593 { 0x08, 4, 32, 16*1024, l1_icache_str},
3554 3594 { 0x06, 4, 32, 8*1024, l1_icache_str},
3555 3595 { 0x05, 4, 0, 32, dtlb4M_str},
3556 3596 { 0x04, 4, 0, 8, dtlb4M_str},
3557 3597 { 0x03, 4, 0, 64, dtlb4k_str},
3558 3598 { 0x02, 4, 0, 2, itlb4M_str},
3559 3599 { 0x01, 4, 0, 32, itlb4k_str},
3560 3600 { 0 }
3561 3601 };
3562 3602
3563 3603 static const struct cachetab cyrix_ctab[] = {
3564 3604 { 0x70, 4, 0, 32, "tlb-4K" },
3565 3605 { 0x80, 4, 16, 16*1024, "l1-cache" },
3566 3606 { 0 }
3567 3607 };
3568 3608
3569 3609 /*
3570 3610 * Search a cache table for a matching entry
3571 3611 */
3572 3612 static const struct cachetab *
3573 3613 find_cacheent(const struct cachetab *ct, uint_t code)
3574 3614 {
3575 3615 if (code != 0) {
3576 3616 for (; ct->ct_code != 0; ct++)
3577 3617 if (ct->ct_code <= code)
3578 3618 break;
3579 3619 if (ct->ct_code == code)
3580 3620 return (ct);
3581 3621 }
3582 3622 return (NULL);
3583 3623 }
3584 3624
3585 3625 /*
3586 3626 * Populate cachetab entry with L2 or L3 cache-information using
3587 3627 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3588 3628 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3589 3629 * information is found.
3590 3630 */
3591 3631 static int
3592 3632 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3593 3633 {
3594 3634 uint32_t level, i;
3595 3635 int ret = 0;
3596 3636
3597 3637 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3598 3638 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3599 3639
3600 3640 if (level == 2 || level == 3) {
3601 3641 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3602 3642 ct->ct_line_size =
3603 3643 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3604 3644 ct->ct_size = ct->ct_assoc *
3605 3645 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3606 3646 ct->ct_line_size *
3607 3647 (cpi->cpi_std_4[i]->cp_ecx + 1);
3608 3648
3609 3649 if (level == 2) {
3610 3650 ct->ct_label = l2_cache_str;
3611 3651 } else if (level == 3) {
3612 3652 ct->ct_label = l3_cache_str;
3613 3653 }
3614 3654 ret = 1;
3615 3655 }
3616 3656 }
3617 3657
3618 3658 return (ret);
3619 3659 }
3620 3660
3621 3661 /*
3622 3662 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3623 3663 * The walk is terminated if the walker returns non-zero.
3624 3664 */
3625 3665 static void
3626 3666 intel_walk_cacheinfo(struct cpuid_info *cpi,
3627 3667 void *arg, int (*func)(void *, const struct cachetab *))
3628 3668 {
3629 3669 const struct cachetab *ct;
3630 3670 struct cachetab des_49_ct, des_b1_ct;
3631 3671 uint8_t *dp;
3632 3672 int i;
3633 3673
3634 3674 if ((dp = cpi->cpi_cacheinfo) == NULL)
3635 3675 return;
3636 3676 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3637 3677 /*
3638 3678 * For overloaded descriptor 0x49 we use cpuid function 4
3639 3679 * if supported by the current processor, to create
3640 3680 * cache information.
3641 3681 * For overloaded descriptor 0xb1 we use X86_PAE flag
3642 3682 * to disambiguate the cache information.
3643 3683 */
3644 3684 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3645 3685 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3646 3686 ct = &des_49_ct;
3647 3687 } else if (*dp == 0xb1) {
3648 3688 des_b1_ct.ct_code = 0xb1;
3649 3689 des_b1_ct.ct_assoc = 4;
3650 3690 des_b1_ct.ct_line_size = 0;
3651 3691 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3652 3692 des_b1_ct.ct_size = 8;
3653 3693 des_b1_ct.ct_label = itlb2M_str;
3654 3694 } else {
3655 3695 des_b1_ct.ct_size = 4;
3656 3696 des_b1_ct.ct_label = itlb4M_str;
3657 3697 }
3658 3698 ct = &des_b1_ct;
3659 3699 } else {
3660 3700 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3661 3701 continue;
3662 3702 }
3663 3703 }
3664 3704
3665 3705 if (func(arg, ct) != 0) {
3666 3706 break;
3667 3707 }
3668 3708 }
3669 3709 }
3670 3710
3671 3711 /*
3672 3712 * (Like the Intel one, except for Cyrix CPUs)
3673 3713 */
3674 3714 static void
3675 3715 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3676 3716 void *arg, int (*func)(void *, const struct cachetab *))
3677 3717 {
3678 3718 const struct cachetab *ct;
3679 3719 uint8_t *dp;
3680 3720 int i;
3681 3721
3682 3722 if ((dp = cpi->cpi_cacheinfo) == NULL)
3683 3723 return;
3684 3724 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3685 3725 /*
3686 3726 * Search Cyrix-specific descriptor table first ..
3687 3727 */
3688 3728 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3689 3729 if (func(arg, ct) != 0)
3690 3730 break;
3691 3731 continue;
3692 3732 }
3693 3733 /*
3694 3734 * .. else fall back to the Intel one
3695 3735 */
3696 3736 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3697 3737 if (func(arg, ct) != 0)
3698 3738 break;
3699 3739 continue;
3700 3740 }
3701 3741 }
3702 3742 }
3703 3743
3704 3744 /*
3705 3745 * A cacheinfo walker that adds associativity, line-size, and size properties
3706 3746 * to the devinfo node it is passed as an argument.
3707 3747 */
3708 3748 static int
3709 3749 add_cacheent_props(void *arg, const struct cachetab *ct)
3710 3750 {
3711 3751 dev_info_t *devi = arg;
3712 3752
3713 3753 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3714 3754 if (ct->ct_line_size != 0)
3715 3755 add_cache_prop(devi, ct->ct_label, line_str,
3716 3756 ct->ct_line_size);
3717 3757 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3718 3758 return (0);
3719 3759 }
3720 3760
3721 3761
3722 3762 static const char fully_assoc[] = "fully-associative?";
3723 3763
3724 3764 /*
3725 3765 * AMD style cache/tlb description
3726 3766 *
3727 3767 * Extended functions 5 and 6 directly describe properties of
3728 3768 * tlbs and various cache levels.
3729 3769 */
3730 3770 static void
3731 3771 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3732 3772 {
3733 3773 switch (assoc) {
3734 3774 case 0: /* reserved; ignore */
3735 3775 break;
3736 3776 default:
3737 3777 add_cache_prop(devi, label, assoc_str, assoc);
3738 3778 break;
3739 3779 case 0xff:
3740 3780 add_cache_prop(devi, label, fully_assoc, 1);
3741 3781 break;
3742 3782 }
3743 3783 }
3744 3784
3745 3785 static void
3746 3786 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3747 3787 {
3748 3788 if (size == 0)
3749 3789 return;
3750 3790 add_cache_prop(devi, label, size_str, size);
3751 3791 add_amd_assoc(devi, label, assoc);
3752 3792 }
3753 3793
3754 3794 static void
3755 3795 add_amd_cache(dev_info_t *devi, const char *label,
3756 3796 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3757 3797 {
3758 3798 if (size == 0 || line_size == 0)
3759 3799 return;
3760 3800 add_amd_assoc(devi, label, assoc);
3761 3801 /*
3762 3802 * Most AMD parts have a sectored cache. Multiple cache lines are
3763 3803 * associated with each tag. A sector consists of all cache lines
3764 3804 * associated with a tag. For example, the AMD K6-III has a sector
3765 3805 * size of 2 cache lines per tag.
3766 3806 */
3767 3807 if (lines_per_tag != 0)
3768 3808 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3769 3809 add_cache_prop(devi, label, line_str, line_size);
3770 3810 add_cache_prop(devi, label, size_str, size * 1024);
3771 3811 }
3772 3812
3773 3813 static void
3774 3814 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3775 3815 {
3776 3816 switch (assoc) {
3777 3817 case 0: /* off */
3778 3818 break;
3779 3819 case 1:
3780 3820 case 2:
3781 3821 case 4:
3782 3822 add_cache_prop(devi, label, assoc_str, assoc);
3783 3823 break;
3784 3824 case 6:
3785 3825 add_cache_prop(devi, label, assoc_str, 8);
3786 3826 break;
3787 3827 case 8:
3788 3828 add_cache_prop(devi, label, assoc_str, 16);
3789 3829 break;
3790 3830 case 0xf:
3791 3831 add_cache_prop(devi, label, fully_assoc, 1);
3792 3832 break;
3793 3833 default: /* reserved; ignore */
3794 3834 break;
3795 3835 }
3796 3836 }
3797 3837
3798 3838 static void
3799 3839 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3800 3840 {
3801 3841 if (size == 0 || assoc == 0)
3802 3842 return;
3803 3843 add_amd_l2_assoc(devi, label, assoc);
3804 3844 add_cache_prop(devi, label, size_str, size);
3805 3845 }
3806 3846
3807 3847 static void
3808 3848 add_amd_l2_cache(dev_info_t *devi, const char *label,
3809 3849 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3810 3850 {
3811 3851 if (size == 0 || assoc == 0 || line_size == 0)
3812 3852 return;
3813 3853 add_amd_l2_assoc(devi, label, assoc);
3814 3854 if (lines_per_tag != 0)
3815 3855 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3816 3856 add_cache_prop(devi, label, line_str, line_size);
3817 3857 add_cache_prop(devi, label, size_str, size * 1024);
3818 3858 }
3819 3859
3820 3860 static void
3821 3861 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3822 3862 {
3823 3863 struct cpuid_regs *cp;
3824 3864
3825 3865 if (cpi->cpi_xmaxeax < 0x80000005)
3826 3866 return;
3827 3867 cp = &cpi->cpi_extd[5];
3828 3868
3829 3869 /*
3830 3870 * 4M/2M L1 TLB configuration
3831 3871 *
3832 3872 * We report the size for 2M pages because AMD uses two
3833 3873 * TLB entries for one 4M page.
3834 3874 */
3835 3875 add_amd_tlb(devi, "dtlb-2M",
3836 3876 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3837 3877 add_amd_tlb(devi, "itlb-2M",
3838 3878 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3839 3879
3840 3880 /*
3841 3881 * 4K L1 TLB configuration
3842 3882 */
3843 3883
3844 3884 switch (cpi->cpi_vendor) {
3845 3885 uint_t nentries;
3846 3886 case X86_VENDOR_TM:
3847 3887 if (cpi->cpi_family >= 5) {
3848 3888 /*
3849 3889 * Crusoe processors have 256 TLB entries, but
3850 3890 * cpuid data format constrains them to only
3851 3891 * reporting 255 of them.
3852 3892 */
3853 3893 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3854 3894 nentries = 256;
3855 3895 /*
3856 3896 * Crusoe processors also have a unified TLB
3857 3897 */
3858 3898 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3859 3899 nentries);
3860 3900 break;
3861 3901 }
3862 3902 /*FALLTHROUGH*/
3863 3903 default:
3864 3904 add_amd_tlb(devi, itlb4k_str,
3865 3905 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3866 3906 add_amd_tlb(devi, dtlb4k_str,
3867 3907 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3868 3908 break;
3869 3909 }
3870 3910
3871 3911 /*
3872 3912 * data L1 cache configuration
3873 3913 */
3874 3914
3875 3915 add_amd_cache(devi, l1_dcache_str,
3876 3916 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3877 3917 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3878 3918
3879 3919 /*
3880 3920 * code L1 cache configuration
3881 3921 */
3882 3922
3883 3923 add_amd_cache(devi, l1_icache_str,
3884 3924 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3885 3925 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3886 3926
3887 3927 if (cpi->cpi_xmaxeax < 0x80000006)
3888 3928 return;
3889 3929 cp = &cpi->cpi_extd[6];
3890 3930
3891 3931 /* Check for a unified L2 TLB for large pages */
3892 3932
3893 3933 if (BITX(cp->cp_eax, 31, 16) == 0)
3894 3934 add_amd_l2_tlb(devi, "l2-tlb-2M",
3895 3935 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3896 3936 else {
3897 3937 add_amd_l2_tlb(devi, "l2-dtlb-2M",
3898 3938 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3899 3939 add_amd_l2_tlb(devi, "l2-itlb-2M",
3900 3940 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3901 3941 }
3902 3942
3903 3943 /* Check for a unified L2 TLB for 4K pages */
3904 3944
3905 3945 if (BITX(cp->cp_ebx, 31, 16) == 0) {
3906 3946 add_amd_l2_tlb(devi, "l2-tlb-4K",
3907 3947 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3908 3948 } else {
3909 3949 add_amd_l2_tlb(devi, "l2-dtlb-4K",
3910 3950 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3911 3951 add_amd_l2_tlb(devi, "l2-itlb-4K",
3912 3952 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3913 3953 }
3914 3954
3915 3955 add_amd_l2_cache(devi, l2_cache_str,
3916 3956 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3917 3957 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3918 3958 }
3919 3959
3920 3960 /*
3921 3961 * There are two basic ways that the x86 world describes it cache
3922 3962 * and tlb architecture - Intel's way and AMD's way.
3923 3963 *
3924 3964 * Return which flavor of cache architecture we should use
3925 3965 */
3926 3966 static int
3927 3967 x86_which_cacheinfo(struct cpuid_info *cpi)
3928 3968 {
3929 3969 switch (cpi->cpi_vendor) {
3930 3970 case X86_VENDOR_Intel:
3931 3971 if (cpi->cpi_maxeax >= 2)
3932 3972 return (X86_VENDOR_Intel);
3933 3973 break;
3934 3974 case X86_VENDOR_AMD:
3935 3975 /*
3936 3976 * The K5 model 1 was the first part from AMD that reported
3937 3977 * cache sizes via extended cpuid functions.
3938 3978 */
3939 3979 if (cpi->cpi_family > 5 ||
3940 3980 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3941 3981 return (X86_VENDOR_AMD);
3942 3982 break;
3943 3983 case X86_VENDOR_TM:
3944 3984 if (cpi->cpi_family >= 5)
3945 3985 return (X86_VENDOR_AMD);
3946 3986 /*FALLTHROUGH*/
3947 3987 default:
3948 3988 /*
3949 3989 * If they have extended CPU data for 0x80000005
3950 3990 * then we assume they have AMD-format cache
3951 3991 * information.
3952 3992 *
3953 3993 * If not, and the vendor happens to be Cyrix,
3954 3994 * then try our-Cyrix specific handler.
3955 3995 *
3956 3996 * If we're not Cyrix, then assume we're using Intel's
3957 3997 * table-driven format instead.
3958 3998 */
3959 3999 if (cpi->cpi_xmaxeax >= 0x80000005)
3960 4000 return (X86_VENDOR_AMD);
3961 4001 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
3962 4002 return (X86_VENDOR_Cyrix);
3963 4003 else if (cpi->cpi_maxeax >= 2)
3964 4004 return (X86_VENDOR_Intel);
3965 4005 break;
3966 4006 }
3967 4007 return (-1);
3968 4008 }
3969 4009
3970 4010 void
3971 4011 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
3972 4012 struct cpuid_info *cpi)
3973 4013 {
3974 4014 dev_info_t *cpu_devi;
3975 4015 int create;
3976 4016
3977 4017 cpu_devi = (dev_info_t *)dip;
3978 4018
3979 4019 /* device_type */
3980 4020 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3981 4021 "device_type", "cpu");
3982 4022
3983 4023 /* reg */
3984 4024 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3985 4025 "reg", cpu_id);
3986 4026
3987 4027 /* cpu-mhz, and clock-frequency */
3988 4028 if (cpu_freq > 0) {
3989 4029 long long mul;
3990 4030
3991 4031 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3992 4032 "cpu-mhz", cpu_freq);
3993 4033 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3994 4034 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3995 4035 "clock-frequency", (int)mul);
3996 4036 }
3997 4037
3998 4038 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
3999 4039 return;
4000 4040 }
4001 4041
4002 4042 /* vendor-id */
4003 4043 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4004 4044 "vendor-id", cpi->cpi_vendorstr);
4005 4045
4006 4046 if (cpi->cpi_maxeax == 0) {
4007 4047 return;
4008 4048 }
4009 4049
4010 4050 /*
4011 4051 * family, model, and step
4012 4052 */
4013 4053 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4014 4054 "family", CPI_FAMILY(cpi));
4015 4055 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4016 4056 "cpu-model", CPI_MODEL(cpi));
4017 4057 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4018 4058 "stepping-id", CPI_STEP(cpi));
4019 4059
4020 4060 /* type */
4021 4061 switch (cpi->cpi_vendor) {
4022 4062 case X86_VENDOR_Intel:
4023 4063 create = 1;
4024 4064 break;
4025 4065 default:
4026 4066 create = 0;
4027 4067 break;
4028 4068 }
4029 4069 if (create)
4030 4070 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4031 4071 "type", CPI_TYPE(cpi));
4032 4072
4033 4073 /* ext-family */
4034 4074 switch (cpi->cpi_vendor) {
4035 4075 case X86_VENDOR_Intel:
4036 4076 case X86_VENDOR_AMD:
4037 4077 create = cpi->cpi_family >= 0xf;
4038 4078 break;
4039 4079 default:
4040 4080 create = 0;
4041 4081 break;
4042 4082 }
4043 4083 if (create)
4044 4084 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4045 4085 "ext-family", CPI_FAMILY_XTD(cpi));
4046 4086
4047 4087 /* ext-model */
4048 4088 switch (cpi->cpi_vendor) {
4049 4089 case X86_VENDOR_Intel:
4050 4090 create = IS_EXTENDED_MODEL_INTEL(cpi);
4051 4091 break;
4052 4092 case X86_VENDOR_AMD:
4053 4093 create = CPI_FAMILY(cpi) == 0xf;
4054 4094 break;
4055 4095 default:
4056 4096 create = 0;
4057 4097 break;
4058 4098 }
4059 4099 if (create)
4060 4100 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4061 4101 "ext-model", CPI_MODEL_XTD(cpi));
4062 4102
4063 4103 /* generation */
4064 4104 switch (cpi->cpi_vendor) {
4065 4105 case X86_VENDOR_AMD:
4066 4106 /*
4067 4107 * AMD K5 model 1 was the first part to support this
4068 4108 */
4069 4109 create = cpi->cpi_xmaxeax >= 0x80000001;
4070 4110 break;
4071 4111 default:
4072 4112 create = 0;
4073 4113 break;
4074 4114 }
4075 4115 if (create)
4076 4116 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4077 4117 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4078 4118
4079 4119 /* brand-id */
4080 4120 switch (cpi->cpi_vendor) {
4081 4121 case X86_VENDOR_Intel:
4082 4122 /*
4083 4123 * brand id first appeared on Pentium III Xeon model 8,
4084 4124 * and Celeron model 8 processors and Opteron
4085 4125 */
4086 4126 create = cpi->cpi_family > 6 ||
4087 4127 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4088 4128 break;
4089 4129 case X86_VENDOR_AMD:
4090 4130 create = cpi->cpi_family >= 0xf;
4091 4131 break;
4092 4132 default:
4093 4133 create = 0;
4094 4134 break;
4095 4135 }
4096 4136 if (create && cpi->cpi_brandid != 0) {
4097 4137 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4098 4138 "brand-id", cpi->cpi_brandid);
4099 4139 }
4100 4140
4101 4141 /* chunks, and apic-id */
4102 4142 switch (cpi->cpi_vendor) {
4103 4143 /*
4104 4144 * first available on Pentium IV and Opteron (K8)
4105 4145 */
4106 4146 case X86_VENDOR_Intel:
4107 4147 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4108 4148 break;
4109 4149 case X86_VENDOR_AMD:
4110 4150 create = cpi->cpi_family >= 0xf;
4111 4151 break;
4112 4152 default:
4113 4153 create = 0;
4114 4154 break;
4115 4155 }
4116 4156 if (create) {
4117 4157 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4118 4158 "chunks", CPI_CHUNKS(cpi));
4119 4159 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4120 4160 "apic-id", cpi->cpi_apicid);
4121 4161 if (cpi->cpi_chipid >= 0) {
4122 4162 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4123 4163 "chip#", cpi->cpi_chipid);
4124 4164 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4125 4165 "clog#", cpi->cpi_clogid);
4126 4166 }
4127 4167 }
4128 4168
4129 4169 /* cpuid-features */
4130 4170 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4131 4171 "cpuid-features", CPI_FEATURES_EDX(cpi));
4132 4172
4133 4173
4134 4174 /* cpuid-features-ecx */
4135 4175 switch (cpi->cpi_vendor) {
4136 4176 case X86_VENDOR_Intel:
4137 4177 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4138 4178 break;
4139 4179 case X86_VENDOR_AMD:
4140 4180 create = cpi->cpi_family >= 0xf;
4141 4181 break;
4142 4182 default:
4143 4183 create = 0;
4144 4184 break;
4145 4185 }
4146 4186 if (create)
4147 4187 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4148 4188 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4149 4189
4150 4190 /* ext-cpuid-features */
4151 4191 switch (cpi->cpi_vendor) {
4152 4192 case X86_VENDOR_Intel:
4153 4193 case X86_VENDOR_AMD:
4154 4194 case X86_VENDOR_Cyrix:
4155 4195 case X86_VENDOR_TM:
4156 4196 case X86_VENDOR_Centaur:
4157 4197 create = cpi->cpi_xmaxeax >= 0x80000001;
4158 4198 break;
4159 4199 default:
4160 4200 create = 0;
4161 4201 break;
4162 4202 }
4163 4203 if (create) {
4164 4204 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4165 4205 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4166 4206 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4167 4207 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4168 4208 }
4169 4209
4170 4210 /*
4171 4211 * Brand String first appeared in Intel Pentium IV, AMD K5
4172 4212 * model 1, and Cyrix GXm. On earlier models we try and
4173 4213 * simulate something similar .. so this string should always
4174 4214 * same -something- about the processor, however lame.
4175 4215 */
4176 4216 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4177 4217 "brand-string", cpi->cpi_brandstr);
4178 4218
4179 4219 /*
4180 4220 * Finally, cache and tlb information
4181 4221 */
4182 4222 switch (x86_which_cacheinfo(cpi)) {
4183 4223 case X86_VENDOR_Intel:
4184 4224 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4185 4225 break;
4186 4226 case X86_VENDOR_Cyrix:
4187 4227 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4188 4228 break;
4189 4229 case X86_VENDOR_AMD:
4190 4230 amd_cache_info(cpi, cpu_devi);
4191 4231 break;
4192 4232 default:
4193 4233 break;
4194 4234 }
4195 4235 }
4196 4236
4197 4237 struct l2info {
4198 4238 int *l2i_csz;
4199 4239 int *l2i_lsz;
4200 4240 int *l2i_assoc;
4201 4241 int l2i_ret;
4202 4242 };
4203 4243
4204 4244 /*
4205 4245 * A cacheinfo walker that fetches the size, line-size and associativity
4206 4246 * of the L2 cache
4207 4247 */
4208 4248 static int
4209 4249 intel_l2cinfo(void *arg, const struct cachetab *ct)
4210 4250 {
4211 4251 struct l2info *l2i = arg;
4212 4252 int *ip;
4213 4253
4214 4254 if (ct->ct_label != l2_cache_str &&
4215 4255 ct->ct_label != sl2_cache_str)
4216 4256 return (0); /* not an L2 -- keep walking */
4217 4257
4218 4258 if ((ip = l2i->l2i_csz) != NULL)
4219 4259 *ip = ct->ct_size;
4220 4260 if ((ip = l2i->l2i_lsz) != NULL)
4221 4261 *ip = ct->ct_line_size;
4222 4262 if ((ip = l2i->l2i_assoc) != NULL)
4223 4263 *ip = ct->ct_assoc;
4224 4264 l2i->l2i_ret = ct->ct_size;
4225 4265 return (1); /* was an L2 -- terminate walk */
4226 4266 }
4227 4267
4228 4268 /*
4229 4269 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4230 4270 *
4231 4271 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4232 4272 * value is the associativity, the associativity for the L2 cache and
4233 4273 * tlb is encoded in the following table. The 4 bit L2 value serves as
4234 4274 * an index into the amd_afd[] array to determine the associativity.
4235 4275 * -1 is undefined. 0 is fully associative.
4236 4276 */
4237 4277
4238 4278 static int amd_afd[] =
4239 4279 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4240 4280
4241 4281 static void
4242 4282 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4243 4283 {
4244 4284 struct cpuid_regs *cp;
4245 4285 uint_t size, assoc;
4246 4286 int i;
4247 4287 int *ip;
4248 4288
4249 4289 if (cpi->cpi_xmaxeax < 0x80000006)
4250 4290 return;
4251 4291 cp = &cpi->cpi_extd[6];
4252 4292
4253 4293 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4254 4294 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4255 4295 uint_t cachesz = size * 1024;
4256 4296 assoc = amd_afd[i];
4257 4297
4258 4298 ASSERT(assoc != -1);
4259 4299
4260 4300 if ((ip = l2i->l2i_csz) != NULL)
4261 4301 *ip = cachesz;
4262 4302 if ((ip = l2i->l2i_lsz) != NULL)
4263 4303 *ip = BITX(cp->cp_ecx, 7, 0);
4264 4304 if ((ip = l2i->l2i_assoc) != NULL)
4265 4305 *ip = assoc;
4266 4306 l2i->l2i_ret = cachesz;
4267 4307 }
4268 4308 }
4269 4309
4270 4310 int
4271 4311 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4272 4312 {
4273 4313 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4274 4314 struct l2info __l2info, *l2i = &__l2info;
4275 4315
4276 4316 l2i->l2i_csz = csz;
4277 4317 l2i->l2i_lsz = lsz;
4278 4318 l2i->l2i_assoc = assoc;
4279 4319 l2i->l2i_ret = -1;
4280 4320
4281 4321 switch (x86_which_cacheinfo(cpi)) {
4282 4322 case X86_VENDOR_Intel:
4283 4323 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4284 4324 break;
4285 4325 case X86_VENDOR_Cyrix:
4286 4326 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4287 4327 break;
4288 4328 case X86_VENDOR_AMD:
4289 4329 amd_l2cacheinfo(cpi, l2i);
4290 4330 break;
4291 4331 default:
4292 4332 break;
4293 4333 }
4294 4334 return (l2i->l2i_ret);
4295 4335 }
4296 4336
4297 4337 #if !defined(__xpv)
4298 4338
4299 4339 uint32_t *
4300 4340 cpuid_mwait_alloc(cpu_t *cpu)
4301 4341 {
4302 4342 uint32_t *ret;
4303 4343 size_t mwait_size;
4304 4344
4305 4345 ASSERT(cpuid_checkpass(CPU, 2));
4306 4346
4307 4347 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4308 4348 if (mwait_size == 0)
4309 4349 return (NULL);
4310 4350
4311 4351 /*
4312 4352 * kmem_alloc() returns cache line size aligned data for mwait_size
4313 4353 * allocations. mwait_size is currently cache line sized. Neither
4314 4354 * of these implementation details are guarantied to be true in the
4315 4355 * future.
4316 4356 *
4317 4357 * First try allocating mwait_size as kmem_alloc() currently returns
4318 4358 * correctly aligned memory. If kmem_alloc() does not return
4319 4359 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4320 4360 *
4321 4361 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4322 4362 * decide to free this memory.
4323 4363 */
4324 4364 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4325 4365 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4326 4366 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4327 4367 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4328 4368 *ret = MWAIT_RUNNING;
4329 4369 return (ret);
4330 4370 } else {
4331 4371 kmem_free(ret, mwait_size);
4332 4372 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4333 4373 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4334 4374 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4335 4375 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4336 4376 *ret = MWAIT_RUNNING;
4337 4377 return (ret);
4338 4378 }
4339 4379 }
4340 4380
4341 4381 void
4342 4382 cpuid_mwait_free(cpu_t *cpu)
4343 4383 {
4344 4384 if (cpu->cpu_m.mcpu_cpi == NULL) {
4345 4385 return;
4346 4386 }
4347 4387
4348 4388 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4349 4389 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4350 4390 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4351 4391 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4352 4392 }
4353 4393
4354 4394 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4355 4395 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4356 4396 }
4357 4397
4358 4398 void
4359 4399 patch_tsc_read(int flag)
4360 4400 {
4361 4401 size_t cnt;
4362 4402
4363 4403 switch (flag) {
4364 4404 case X86_NO_TSC:
4365 4405 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4366 4406 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4367 4407 break;
4368 4408 case X86_HAVE_TSCP:
4369 4409 cnt = &_tscp_end - &_tscp_start;
4370 4410 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4371 4411 break;
4372 4412 case X86_TSC_MFENCE:
4373 4413 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4374 4414 (void) memcpy((void *)tsc_read,
4375 4415 (void *)&_tsc_mfence_start, cnt);
4376 4416 break;
4377 4417 case X86_TSC_LFENCE:
4378 4418 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4379 4419 (void) memcpy((void *)tsc_read,
4380 4420 (void *)&_tsc_lfence_start, cnt);
4381 4421 break;
4382 4422 default:
4383 4423 break;
4384 4424 }
4385 4425 }
4386 4426
4387 4427 int
4388 4428 cpuid_deep_cstates_supported(void)
4389 4429 {
4390 4430 struct cpuid_info *cpi;
4391 4431 struct cpuid_regs regs;
4392 4432
4393 4433 ASSERT(cpuid_checkpass(CPU, 1));
4394 4434
4395 4435 cpi = CPU->cpu_m.mcpu_cpi;
4396 4436
4397 4437 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4398 4438 return (0);
4399 4439
4400 4440 switch (cpi->cpi_vendor) {
4401 4441 case X86_VENDOR_Intel:
4402 4442 if (cpi->cpi_xmaxeax < 0x80000007)
4403 4443 return (0);
4404 4444
4405 4445 /*
4406 4446 * TSC run at a constant rate in all ACPI C-states?
4407 4447 */
4408 4448 regs.cp_eax = 0x80000007;
4409 4449 (void) __cpuid_insn(®s);
4410 4450 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4411 4451
4412 4452 default:
4413 4453 return (0);
4414 4454 }
4415 4455 }
4416 4456
4417 4457 #endif /* !__xpv */
4418 4458
4419 4459 void
4420 4460 post_startup_cpu_fixups(void)
4421 4461 {
4422 4462 #ifndef __xpv
4423 4463 /*
4424 4464 * Some AMD processors support C1E state. Entering this state will
4425 4465 * cause the local APIC timer to stop, which we can't deal with at
4426 4466 * this time.
4427 4467 */
4428 4468 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4429 4469 on_trap_data_t otd;
4430 4470 uint64_t reg;
4431 4471
4432 4472 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4433 4473 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4434 4474 /* Disable C1E state if it is enabled by BIOS */
4435 4475 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4436 4476 AMD_ACTONCMPHALT_MASK) {
4437 4477 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4438 4478 AMD_ACTONCMPHALT_SHIFT);
4439 4479 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4440 4480 }
4441 4481 }
4442 4482 no_trap();
4443 4483 }
4444 4484 #endif /* !__xpv */
4445 4485 }
4446 4486
4447 4487 /*
4448 4488 * Setup necessary registers to enable XSAVE feature on this processor.
4449 4489 * This function needs to be called early enough, so that no xsave/xrstor
4450 4490 * ops will execute on the processor before the MSRs are properly set up.
4451 4491 *
4452 4492 * Current implementation has the following assumption:
4453 4493 * - cpuid_pass1() is done, so that X86 features are known.
4454 4494 * - fpu_probe() is done, so that fp_save_mech is chosen.
4455 4495 */
4456 4496 void
4457 4497 xsave_setup_msr(cpu_t *cpu)
4458 4498 {
4459 4499 ASSERT(fp_save_mech == FP_XSAVE);
4460 4500 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4461 4501
4462 4502 /* Enable OSXSAVE in CR4. */
4463 4503 setcr4(getcr4() | CR4_OSXSAVE);
4464 4504 /*
4465 4505 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4466 4506 * correct value.
4467 4507 */
4468 4508 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4469 4509 setup_xfem();
4470 4510 }
4471 4511
4472 4512 /*
4473 4513 * Starting with the Westmere processor the local
4474 4514 * APIC timer will continue running in all C-states,
4475 4515 * including the deepest C-states.
4476 4516 */
4477 4517 int
4478 4518 cpuid_arat_supported(void)
4479 4519 {
4480 4520 struct cpuid_info *cpi;
4481 4521 struct cpuid_regs regs;
4482 4522
4483 4523 ASSERT(cpuid_checkpass(CPU, 1));
4484 4524 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4485 4525
4486 4526 cpi = CPU->cpu_m.mcpu_cpi;
4487 4527
4488 4528 switch (cpi->cpi_vendor) {
4489 4529 case X86_VENDOR_Intel:
4490 4530 /*
4491 4531 * Always-running Local APIC Timer is
4492 4532 * indicated by CPUID.6.EAX[2].
4493 4533 */
4494 4534 if (cpi->cpi_maxeax >= 6) {
4495 4535 regs.cp_eax = 6;
4496 4536 (void) cpuid_insn(NULL, ®s);
4497 4537 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4498 4538 } else {
4499 4539 return (0);
4500 4540 }
4501 4541 default:
4502 4542 return (0);
4503 4543 }
4504 4544 }
4505 4545
4506 4546 /*
4507 4547 * Check support for Intel ENERGY_PERF_BIAS feature
4508 4548 */
4509 4549 int
4510 4550 cpuid_iepb_supported(struct cpu *cp)
4511 4551 {
4512 4552 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4513 4553 struct cpuid_regs regs;
4514 4554
4515 4555 ASSERT(cpuid_checkpass(cp, 1));
4516 4556
4517 4557 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4518 4558 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4519 4559 return (0);
4520 4560 }
4521 4561
4522 4562 /*
4523 4563 * Intel ENERGY_PERF_BIAS MSR is indicated by
4524 4564 * capability bit CPUID.6.ECX.3
4525 4565 */
4526 4566 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4527 4567 return (0);
4528 4568
4529 4569 regs.cp_eax = 0x6;
4530 4570 (void) cpuid_insn(NULL, ®s);
4531 4571 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4532 4572 }
4533 4573
4534 4574 /*
4535 4575 * Check support for TSC deadline timer
4536 4576 *
4537 4577 * TSC deadline timer provides a superior software programming
4538 4578 * model over local APIC timer that eliminates "time drifts".
4539 4579 * Instead of specifying a relative time, software specifies an
4540 4580 * absolute time as the target at which the processor should
4541 4581 * generate a timer event.
4542 4582 */
4543 4583 int
4544 4584 cpuid_deadline_tsc_supported(void)
4545 4585 {
4546 4586 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4547 4587 struct cpuid_regs regs;
4548 4588
4549 4589 ASSERT(cpuid_checkpass(CPU, 1));
4550 4590 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4551 4591
4552 4592 switch (cpi->cpi_vendor) {
4553 4593 case X86_VENDOR_Intel:
4554 4594 if (cpi->cpi_maxeax >= 1) {
4555 4595 regs.cp_eax = 1;
4556 4596 (void) cpuid_insn(NULL, ®s);
4557 4597 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4558 4598 } else {
4559 4599 return (0);
4560 4600 }
4561 4601 default:
4562 4602 return (0);
4563 4603 }
4564 4604 }
4565 4605
4566 4606 #if defined(__amd64) && !defined(__xpv)
4567 4607 /*
4568 4608 * Patch in versions of bcopy for high performance Intel Nhm processors
4569 4609 * and later...
4570 4610 */
4571 4611 void
4572 4612 patch_memops(uint_t vendor)
4573 4613 {
4574 4614 size_t cnt, i;
4575 4615 caddr_t to, from;
4576 4616
4577 4617 if ((vendor == X86_VENDOR_Intel) &&
4578 4618 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4579 4619 cnt = &bcopy_patch_end - &bcopy_patch_start;
4580 4620 to = &bcopy_ck_size;
4581 4621 from = &bcopy_patch_start;
4582 4622 for (i = 0; i < cnt; i++) {
4583 4623 *to++ = *from++;
4584 4624 }
4585 4625 }
4586 4626 }
4587 4627 #endif /* __amd64 && !__xpv */
4588 4628
4589 4629 /*
4590 4630 * This function finds the number of bits to represent the number of cores per
4591 4631 * chip and the number of strands per core for the Intel platforms.
4592 4632 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4593 4633 */
4594 4634 void
4595 4635 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4596 4636 {
4597 4637 struct cpuid_regs regs;
4598 4638 struct cpuid_regs *cp = ®s;
4599 4639
4600 4640 if (vendor != X86_VENDOR_Intel) {
4601 4641 return;
4602 4642 }
4603 4643
4604 4644 /* if the cpuid level is 0xB, extended topo is available. */
4605 4645 cp->cp_eax = 0;
4606 4646 if (__cpuid_insn(cp) >= 0xB) {
4607 4647
4608 4648 cp->cp_eax = 0xB;
4609 4649 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4610 4650 (void) __cpuid_insn(cp);
4611 4651
4612 4652 /*
4613 4653 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4614 4654 * indicates that the extended topology enumeration leaf is
4615 4655 * available.
4616 4656 */
4617 4657 if (cp->cp_ebx) {
4618 4658 uint_t coreid_shift = 0;
4619 4659 uint_t chipid_shift = 0;
4620 4660 uint_t i;
4621 4661 uint_t level;
4622 4662
4623 4663 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4624 4664 cp->cp_eax = 0xB;
4625 4665 cp->cp_ecx = i;
4626 4666
4627 4667 (void) __cpuid_insn(cp);
4628 4668 level = CPI_CPU_LEVEL_TYPE(cp);
4629 4669
4630 4670 if (level == 1) {
4631 4671 /*
4632 4672 * Thread level processor topology
4633 4673 * Number of bits shift right APIC ID
4634 4674 * to get the coreid.
4635 4675 */
4636 4676 coreid_shift = BITX(cp->cp_eax, 4, 0);
4637 4677 } else if (level == 2) {
4638 4678 /*
4639 4679 * Core level processor topology
4640 4680 * Number of bits shift right APIC ID
4641 4681 * to get the chipid.
4642 4682 */
4643 4683 chipid_shift = BITX(cp->cp_eax, 4, 0);
4644 4684 }
4645 4685 }
4646 4686
4647 4687 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4648 4688 *strand_nbits = coreid_shift;
4649 4689 *core_nbits = chipid_shift - coreid_shift;
4650 4690 }
4651 4691 }
4652 4692 }
4653 4693 }
↓ open down ↓ |
1641 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX