Print this page
2650 AMD family 0x15 PG support
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/os/cpuid.c
+++ new/usr/src/uts/i86pc/os/cpuid.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 by Delphix. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Portions Copyright 2009 Advanced Micro Devices, Inc.
31 31 */
32 32 /*
33 33 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
34 34 */
35 35 /*
36 36 * Various routines to handle identification
37 37 * and classification of x86 processors.
38 38 */
39 39
40 40 #include <sys/types.h>
41 41 #include <sys/archsystm.h>
42 42 #include <sys/x86_archext.h>
43 43 #include <sys/kmem.h>
44 44 #include <sys/systm.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/sunddi.h>
47 47 #include <sys/sunndi.h>
48 48 #include <sys/cpuvar.h>
49 49 #include <sys/processor.h>
50 50 #include <sys/sysmacros.h>
51 51 #include <sys/pg.h>
52 52 #include <sys/fp.h>
53 53 #include <sys/controlregs.h>
54 54 #include <sys/bitmap.h>
55 55 #include <sys/auxv_386.h>
56 56 #include <sys/memnode.h>
57 57 #include <sys/pci_cfgspace.h>
58 58
59 59 #ifdef __xpv
60 60 #include <sys/hypervisor.h>
61 61 #else
62 62 #include <sys/ontrap.h>
63 63 #endif
64 64
65 65 /*
66 66 * Pass 0 of cpuid feature analysis happens in locore. It contains special code
67 67 * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
68 68 * them accordingly. For most modern processors, feature detection occurs here
69 69 * in pass 1.
70 70 *
71 71 * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
72 72 * for the boot CPU and does the basic analysis that the early kernel needs.
73 73 * x86_featureset is set based on the return value of cpuid_pass1() of the boot
74 74 * CPU.
75 75 *
76 76 * Pass 1 includes:
77 77 *
78 78 * o Determining vendor/model/family/stepping and setting x86_type and
79 79 * x86_vendor accordingly.
80 80 * o Processing the feature flags returned by the cpuid instruction while
81 81 * applying any workarounds or tricks for the specific processor.
82 82 * o Mapping the feature flags into Solaris feature bits (X86_*).
83 83 * o Processing extended feature flags if supported by the processor,
84 84 * again while applying specific processor knowledge.
85 85 * o Determining the CMT characteristics of the system.
86 86 *
87 87 * Pass 1 is done on non-boot CPUs during their initialization and the results
88 88 * are used only as a meager attempt at ensuring that all processors within the
89 89 * system support the same features.
90 90 *
91 91 * Pass 2 of cpuid feature analysis happens just at the beginning
92 92 * of startup(). It just copies in and corrects the remainder
93 93 * of the cpuid data we depend on: standard cpuid functions that we didn't
94 94 * need for pass1 feature analysis, and extended cpuid functions beyond the
95 95 * simple feature processing done in pass1.
96 96 *
97 97 * Pass 3 of cpuid analysis is invoked after basic kernel services; in
98 98 * particular kernel memory allocation has been made available. It creates a
99 99 * readable brand string based on the data collected in the first two passes.
100 100 *
101 101 * Pass 4 of cpuid analysis is invoked after post_startup() when all
102 102 * the support infrastructure for various hardware features has been
103 103 * initialized. It determines which processor features will be reported
104 104 * to userland via the aux vector.
105 105 *
106 106 * All passes are executed on all CPUs, but only the boot CPU determines what
107 107 * features the kernel will use.
108 108 *
109 109 * Much of the worst junk in this file is for the support of processors
110 110 * that didn't really implement the cpuid instruction properly.
111 111 *
112 112 * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
113 113 * the pass numbers. Accordingly, changes to the pass code may require changes
114 114 * to the accessor code.
115 115 */
116 116
117 117 uint_t x86_vendor = X86_VENDOR_IntelClone;
118 118 uint_t x86_type = X86_TYPE_OTHER;
119 119 uint_t x86_clflush_size = 0;
120 120
121 121 uint_t pentiumpro_bug4046376;
122 122 uint_t pentiumpro_bug4064495;
123 123
124 124 uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
125 125
126 126 static char *x86_feature_names[NUM_X86_FEATURES] = {
127 127 "lgpg",
128 128 "tsc",
129 129 "msr",
130 130 "mtrr",
131 131 "pge",
132 132 "de",
133 133 "cmov",
134 134 "mmx",
135 135 "mca",
136 136 "pae",
137 137 "cv8",
138 138 "pat",
139 139 "sep",
140 140 "sse",
141 141 "sse2",
142 142 "htt",
143 143 "asysc",
144 144 "nx",
145 145 "sse3",
146 146 "cx16",
147 147 "cmp",
148 148 "tscp",
149 149 "mwait",
150 150 "sse4a",
151 151 "cpuid",
152 152 "ssse3",
↓ open down ↓ |
152 lines elided |
↑ open up ↑ |
153 153 "sse4_1",
154 154 "sse4_2",
155 155 "1gpg",
156 156 "clfsh",
157 157 "64",
158 158 "aes",
159 159 "pclmulqdq",
160 160 "xsave",
161 161 "avx",
162 162 "vmx",
163 - "svm"
163 + "svm",
164 + "topoext"
164 165 };
165 166
166 167 boolean_t
167 168 is_x86_feature(void *featureset, uint_t feature)
168 169 {
169 170 ASSERT(feature < NUM_X86_FEATURES);
170 171 return (BT_TEST((ulong_t *)featureset, feature));
171 172 }
172 173
173 174 void
174 175 add_x86_feature(void *featureset, uint_t feature)
175 176 {
176 177 ASSERT(feature < NUM_X86_FEATURES);
177 178 BT_SET((ulong_t *)featureset, feature);
178 179 }
179 180
180 181 void
181 182 remove_x86_feature(void *featureset, uint_t feature)
182 183 {
183 184 ASSERT(feature < NUM_X86_FEATURES);
184 185 BT_CLEAR((ulong_t *)featureset, feature);
185 186 }
186 187
187 188 boolean_t
188 189 compare_x86_featureset(void *setA, void *setB)
189 190 {
190 191 /*
191 192 * We assume that the unused bits of the bitmap are always zero.
192 193 */
193 194 if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
194 195 return (B_TRUE);
195 196 } else {
196 197 return (B_FALSE);
197 198 }
198 199 }
199 200
200 201 void
201 202 print_x86_featureset(void *featureset)
202 203 {
203 204 uint_t i;
204 205
205 206 for (i = 0; i < NUM_X86_FEATURES; i++) {
206 207 if (is_x86_feature(featureset, i)) {
207 208 cmn_err(CE_CONT, "?x86_feature: %s\n",
208 209 x86_feature_names[i]);
209 210 }
210 211 }
211 212 }
212 213
213 214 uint_t enable486;
214 215
215 216 static size_t xsave_state_size = 0;
216 217 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
217 218 boolean_t xsave_force_disable = B_FALSE;
218 219
219 220 /*
220 221 * This is set to platform type Solaris is running on.
221 222 */
222 223 static int platform_type = -1;
223 224
224 225 #if !defined(__xpv)
225 226 /*
226 227 * Variable to patch if hypervisor platform detection needs to be
227 228 * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
228 229 */
229 230 int enable_platform_detection = 1;
230 231 #endif
231 232
232 233 /*
233 234 * monitor/mwait info.
234 235 *
235 236 * size_actual and buf_actual are the real address and size allocated to get
236 237 * proper mwait_buf alignement. buf_actual and size_actual should be passed
237 238 * to kmem_free(). Currently kmem_alloc() and mwait happen to both use
238 239 * processor cache-line alignment, but this is not guarantied in the furture.
239 240 */
240 241 struct mwait_info {
241 242 size_t mon_min; /* min size to avoid missed wakeups */
242 243 size_t mon_max; /* size to avoid false wakeups */
243 244 size_t size_actual; /* size actually allocated */
244 245 void *buf_actual; /* memory actually allocated */
245 246 uint32_t support; /* processor support of monitor/mwait */
246 247 };
247 248
248 249 /*
249 250 * xsave/xrestor info.
250 251 *
251 252 * This structure contains HW feature bits and size of the xsave save area.
252 253 * Note: the kernel will use the maximum size required for all hardware
253 254 * features. It is not optimize for potential memory savings if features at
254 255 * the end of the save area are not enabled.
255 256 */
256 257 struct xsave_info {
257 258 uint32_t xsav_hw_features_low; /* Supported HW features */
258 259 uint32_t xsav_hw_features_high; /* Supported HW features */
259 260 size_t xsav_max_size; /* max size save area for HW features */
260 261 size_t ymm_size; /* AVX: size of ymm save area */
261 262 size_t ymm_offset; /* AVX: offset for ymm save area */
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
262 263 };
263 264
264 265
265 266 /*
266 267 * These constants determine how many of the elements of the
267 268 * cpuid we cache in the cpuid_info data structure; the
268 269 * remaining elements are accessible via the cpuid instruction.
269 270 */
270 271
271 272 #define NMAX_CPI_STD 6 /* eax = 0 .. 5 */
272 -#define NMAX_CPI_EXTD 0x1c /* eax = 0x80000000 .. 0x8000001b */
273 +#define NMAX_CPI_EXTD 0x1f /* eax = 0x80000000 .. 0x8000001e */
273 274
274 275 /*
275 276 * Some terminology needs to be explained:
276 277 * - Socket: Something that can be plugged into a motherboard.
277 278 * - Package: Same as socket
278 279 * - Chip: Same as socket. Note that AMD's documentation uses term "chip"
279 280 * differently: there, chip is the same as processor node (below)
280 281 * - Processor node: Some AMD processors have more than one
281 282 * "subprocessor" embedded in a package. These subprocessors (nodes)
282 283 * are fully-functional processors themselves with cores, caches,
283 284 * memory controllers, PCI configuration spaces. They are connected
284 285 * inside the package with Hypertransport links. On single-node
285 286 * processors, processor node is equivalent to chip/socket/package.
287 + * - Compute Unit: Some AMD processors pair cores in "compute units" that
288 + * share the FPU and the I$ and L2 caches.
286 289 */
287 290
288 291 struct cpuid_info {
289 292 uint_t cpi_pass; /* last pass completed */
290 293 /*
291 294 * standard function information
292 295 */
293 296 uint_t cpi_maxeax; /* fn 0: %eax */
294 297 char cpi_vendorstr[13]; /* fn 0: %ebx:%ecx:%edx */
295 298 uint_t cpi_vendor; /* enum of cpi_vendorstr */
296 299
297 300 uint_t cpi_family; /* fn 1: extended family */
298 301 uint_t cpi_model; /* fn 1: extended model */
299 302 uint_t cpi_step; /* fn 1: stepping */
300 303 chipid_t cpi_chipid; /* fn 1: %ebx: Intel: chip # */
301 304 /* AMD: package/socket # */
302 305 uint_t cpi_brandid; /* fn 1: %ebx: brand ID */
303 306 int cpi_clogid; /* fn 1: %ebx: thread # */
304 307 uint_t cpi_ncpu_per_chip; /* fn 1: %ebx: logical cpu count */
305 308 uint8_t cpi_cacheinfo[16]; /* fn 2: intel-style cache desc */
306 309 uint_t cpi_ncache; /* fn 2: number of elements */
307 310 uint_t cpi_ncpu_shr_last_cache; /* fn 4: %eax: ncpus sharing cache */
308 311 id_t cpi_last_lvl_cacheid; /* fn 4: %eax: derived cache id */
309 312 uint_t cpi_std_4_size; /* fn 4: number of fn 4 elements */
310 313 struct cpuid_regs **cpi_std_4; /* fn 4: %ecx == 0 .. fn4_size */
311 314 struct cpuid_regs cpi_std[NMAX_CPI_STD]; /* 0 .. 5 */
312 315 /*
313 316 * extended function information
314 317 */
315 318 uint_t cpi_xmaxeax; /* fn 0x80000000: %eax */
316 319 char cpi_brandstr[49]; /* fn 0x8000000[234] */
317 320 uint8_t cpi_pabits; /* fn 0x80000006: %eax */
318 321 uint8_t cpi_vabits; /* fn 0x80000006: %eax */
319 322 struct cpuid_regs cpi_extd[NMAX_CPI_EXTD]; /* 0x800000XX */
320 323
321 324 id_t cpi_coreid; /* same coreid => strands share core */
322 325 int cpi_pkgcoreid; /* core number within single package */
323 326 uint_t cpi_ncore_per_chip; /* AMD: fn 0x80000008: %ecx[7-0] */
324 327 /* Intel: fn 4: %eax[31-26] */
325 328 /*
326 329 * supported feature information
327 330 */
328 331 uint32_t cpi_support[5];
329 332 #define STD_EDX_FEATURES 0
330 333 #define AMD_EDX_FEATURES 1
331 334 #define TM_EDX_FEATURES 2
332 335 #define STD_ECX_FEATURES 3
333 336 #define AMD_ECX_FEATURES 4
334 337 /*
335 338 * Synthesized information, where known.
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
336 339 */
337 340 uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
338 341 const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
339 342 uint32_t cpi_socket; /* Chip package/socket type */
340 343
341 344 struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
342 345 uint32_t cpi_apicid;
343 346 uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
344 347 uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
345 348 /* Intel: 1 */
349 + uint_t cpi_compunitid; /* AMD: ComputeUnit ID, Intel: coreid */
350 + uint_t cpi_cores_per_compunit; /* AMD: # of cores in the ComputeUnit */
346 351
347 352 struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
348 353 };
349 354
350 355
351 356 static struct cpuid_info cpuid_info0;
352 357
353 358 /*
354 359 * These bit fields are defined by the Intel Application Note AP-485
355 360 * "Intel Processor Identification and the CPUID Instruction"
356 361 */
357 362 #define CPI_FAMILY_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
358 363 #define CPI_MODEL_XTD(cpi) BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
359 364 #define CPI_TYPE(cpi) BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
360 365 #define CPI_FAMILY(cpi) BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
361 366 #define CPI_STEP(cpi) BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
362 367 #define CPI_MODEL(cpi) BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
363 368
364 369 #define CPI_FEATURES_EDX(cpi) ((cpi)->cpi_std[1].cp_edx)
365 370 #define CPI_FEATURES_ECX(cpi) ((cpi)->cpi_std[1].cp_ecx)
366 371 #define CPI_FEATURES_XTD_EDX(cpi) ((cpi)->cpi_extd[1].cp_edx)
367 372 #define CPI_FEATURES_XTD_ECX(cpi) ((cpi)->cpi_extd[1].cp_ecx)
368 373
369 374 #define CPI_BRANDID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
370 375 #define CPI_CHUNKS(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
371 376 #define CPI_CPU_COUNT(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
372 377 #define CPI_APIC_ID(cpi) BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
373 378
374 379 #define CPI_MAXEAX_MAX 0x100 /* sanity control */
375 380 #define CPI_XMAXEAX_MAX 0x80000100
376 381 #define CPI_FN4_ECX_MAX 0x20 /* sanity: max fn 4 levels */
377 382 #define CPI_FNB_ECX_MAX 0x20 /* sanity: max fn B levels */
378 383
379 384 /*
380 385 * Function 4 (Deterministic Cache Parameters) macros
381 386 * Defined by Intel Application Note AP-485
382 387 */
383 388 #define CPI_NUM_CORES(regs) BITX((regs)->cp_eax, 31, 26)
384 389 #define CPI_NTHR_SHR_CACHE(regs) BITX((regs)->cp_eax, 25, 14)
385 390 #define CPI_FULL_ASSOC_CACHE(regs) BITX((regs)->cp_eax, 9, 9)
386 391 #define CPI_SELF_INIT_CACHE(regs) BITX((regs)->cp_eax, 8, 8)
387 392 #define CPI_CACHE_LVL(regs) BITX((regs)->cp_eax, 7, 5)
388 393 #define CPI_CACHE_TYPE(regs) BITX((regs)->cp_eax, 4, 0)
389 394 #define CPI_CPU_LEVEL_TYPE(regs) BITX((regs)->cp_ecx, 15, 8)
390 395
391 396 #define CPI_CACHE_WAYS(regs) BITX((regs)->cp_ebx, 31, 22)
392 397 #define CPI_CACHE_PARTS(regs) BITX((regs)->cp_ebx, 21, 12)
393 398 #define CPI_CACHE_COH_LN_SZ(regs) BITX((regs)->cp_ebx, 11, 0)
394 399
395 400 #define CPI_CACHE_SETS(regs) BITX((regs)->cp_ecx, 31, 0)
396 401
397 402 #define CPI_PREFCH_STRIDE(regs) BITX((regs)->cp_edx, 9, 0)
398 403
399 404
400 405 /*
401 406 * A couple of shorthand macros to identify "later" P6-family chips
402 407 * like the Pentium M and Core. First, the "older" P6-based stuff
403 408 * (loosely defined as "pre-Pentium-4"):
404 409 * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
405 410 */
406 411
407 412 #define IS_LEGACY_P6(cpi) ( \
408 413 cpi->cpi_family == 6 && \
409 414 (cpi->cpi_model == 1 || \
410 415 cpi->cpi_model == 3 || \
411 416 cpi->cpi_model == 5 || \
412 417 cpi->cpi_model == 6 || \
413 418 cpi->cpi_model == 7 || \
414 419 cpi->cpi_model == 8 || \
415 420 cpi->cpi_model == 0xA || \
416 421 cpi->cpi_model == 0xB) \
417 422 )
418 423
419 424 /* A "new F6" is everything with family 6 that's not the above */
420 425 #define IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
421 426
422 427 /* Extended family/model support */
423 428 #define IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
424 429 cpi->cpi_family >= 0xf)
425 430
426 431 /*
427 432 * Info for monitor/mwait idle loop.
428 433 *
429 434 * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
430 435 * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
431 436 * 2006.
432 437 * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
433 438 * Documentation Updates" #33633, Rev 2.05, December 2006.
434 439 */
435 440 #define MWAIT_SUPPORT (0x00000001) /* mwait supported */
436 441 #define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
437 442 #define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
438 443 #define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
439 444 #define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
440 445 #define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
441 446 #define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
442 447 #define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
443 448 /*
444 449 * Number of sub-cstates for a given c-state.
445 450 */
446 451 #define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
447 452 BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
448 453
449 454 /*
450 455 * XSAVE leaf 0xD enumeration
451 456 */
452 457 #define CPUID_LEAFD_2_YMM_OFFSET 576
453 458 #define CPUID_LEAFD_2_YMM_SIZE 256
454 459
455 460 /*
456 461 * Functions we consune from cpuid_subr.c; don't publish these in a header
457 462 * file to try and keep people using the expected cpuid_* interfaces.
458 463 */
459 464 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
460 465 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
461 466 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
462 467 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
463 468 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
464 469
465 470 /*
466 471 * Apply up various platform-dependent restrictions where the
467 472 * underlying platform restrictions mean the CPU can be marked
468 473 * as less capable than its cpuid instruction would imply.
469 474 */
470 475 #if defined(__xpv)
471 476 static void
472 477 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
473 478 {
474 479 switch (eax) {
475 480 case 1: {
476 481 uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
477 482 0 : CPUID_INTC_EDX_MCA;
478 483 cp->cp_edx &=
479 484 ~(mcamask |
480 485 CPUID_INTC_EDX_PSE |
481 486 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
482 487 CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
483 488 CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
484 489 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
485 490 CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
486 491 break;
487 492 }
488 493
489 494 case 0x80000001:
490 495 cp->cp_edx &=
491 496 ~(CPUID_AMD_EDX_PSE |
492 497 CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
493 498 CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
494 499 CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
495 500 CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
496 501 CPUID_AMD_EDX_TSCP);
497 502 cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
498 503 break;
499 504 default:
500 505 break;
501 506 }
502 507
503 508 switch (vendor) {
504 509 case X86_VENDOR_Intel:
505 510 switch (eax) {
506 511 case 4:
507 512 /*
508 513 * Zero out the (ncores-per-chip - 1) field
509 514 */
510 515 cp->cp_eax &= 0x03fffffff;
511 516 break;
512 517 default:
513 518 break;
514 519 }
515 520 break;
516 521 case X86_VENDOR_AMD:
517 522 switch (eax) {
518 523
519 524 case 0x80000001:
520 525 cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
521 526 break;
522 527
523 528 case 0x80000008:
524 529 /*
525 530 * Zero out the (ncores-per-chip - 1) field
526 531 */
527 532 cp->cp_ecx &= 0xffffff00;
528 533 break;
529 534 default:
530 535 break;
531 536 }
532 537 break;
533 538 default:
534 539 break;
535 540 }
536 541 }
537 542 #else
538 543 #define platform_cpuid_mangle(vendor, eax, cp) /* nothing */
539 544 #endif
540 545
541 546 /*
542 547 * Some undocumented ways of patching the results of the cpuid
543 548 * instruction to permit running Solaris 10 on future cpus that
544 549 * we don't currently support. Could be set to non-zero values
545 550 * via settings in eeprom.
546 551 */
547 552
548 553 uint32_t cpuid_feature_ecx_include;
549 554 uint32_t cpuid_feature_ecx_exclude;
550 555 uint32_t cpuid_feature_edx_include;
551 556 uint32_t cpuid_feature_edx_exclude;
552 557
553 558 /*
554 559 * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
555 560 */
556 561 void
557 562 cpuid_alloc_space(cpu_t *cpu)
558 563 {
559 564 /*
560 565 * By convention, cpu0 is the boot cpu, which is set up
561 566 * before memory allocation is available. All other cpus get
562 567 * their cpuid_info struct allocated here.
563 568 */
564 569 ASSERT(cpu->cpu_id != 0);
565 570 ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
566 571 cpu->cpu_m.mcpu_cpi =
567 572 kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
568 573 }
569 574
570 575 void
571 576 cpuid_free_space(cpu_t *cpu)
572 577 {
573 578 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
574 579 int i;
575 580
576 581 ASSERT(cpi != NULL);
577 582 ASSERT(cpi != &cpuid_info0);
578 583
579 584 /*
580 585 * Free up any function 4 related dynamic storage
581 586 */
582 587 for (i = 1; i < cpi->cpi_std_4_size; i++)
583 588 kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
584 589 if (cpi->cpi_std_4_size > 0)
585 590 kmem_free(cpi->cpi_std_4,
586 591 cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
587 592
588 593 kmem_free(cpi, sizeof (*cpi));
589 594 cpu->cpu_m.mcpu_cpi = NULL;
590 595 }
591 596
592 597 #if !defined(__xpv)
593 598
594 599 /*
595 600 * Determine the type of the underlying platform. This is used to customize
596 601 * initialization of various subsystems (e.g. TSC). determine_platform() must
597 602 * only ever be called once to prevent two processors from seeing different
598 603 * values of platform_type, it must be called before cpuid_pass1(), the
599 604 * earliest consumer to execute.
600 605 */
601 606 void
602 607 determine_platform(void)
603 608 {
604 609 struct cpuid_regs cp;
605 610 char *xen_str;
606 611 uint32_t xen_signature[4], base;
607 612
608 613 ASSERT(platform_type == -1);
609 614
610 615 platform_type = HW_NATIVE;
611 616
612 617 if (!enable_platform_detection)
613 618 return;
614 619
615 620 /*
616 621 * In a fully virtualized domain, Xen's pseudo-cpuid function
617 622 * returns a string representing the Xen signature in %ebx, %ecx,
618 623 * and %edx. %eax contains the maximum supported cpuid function.
619 624 * We need at least a (base + 2) leaf value to do what we want
620 625 * to do. Try different base values, since the hypervisor might
621 626 * use a different one depending on whether hyper-v emulation
622 627 * is switched on by default or not.
623 628 */
624 629 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
625 630 cp.cp_eax = base;
626 631 (void) __cpuid_insn(&cp);
627 632 xen_signature[0] = cp.cp_ebx;
628 633 xen_signature[1] = cp.cp_ecx;
629 634 xen_signature[2] = cp.cp_edx;
630 635 xen_signature[3] = 0;
631 636 xen_str = (char *)xen_signature;
632 637 if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
633 638 cp.cp_eax >= (base + 2)) {
634 639 platform_type = HW_XEN_HVM;
635 640 return;
636 641 }
637 642 }
638 643
639 644 if (vmware_platform()) /* running under vmware hypervisor? */
640 645 platform_type = HW_VMWARE;
641 646 }
642 647
643 648 int
644 649 get_hwenv(void)
645 650 {
646 651 ASSERT(platform_type != -1);
647 652 return (platform_type);
648 653 }
649 654
650 655 int
651 656 is_controldom(void)
652 657 {
653 658 return (0);
654 659 }
655 660
656 661 #else
657 662
658 663 int
659 664 get_hwenv(void)
660 665 {
661 666 return (HW_XEN_PV);
662 667 }
663 668
664 669 int
665 670 is_controldom(void)
666 671 {
667 672 return (DOMAIN_IS_INITDOMAIN(xen_info));
668 673 }
669 674
670 675 #endif /* __xpv */
671 676
672 677 static void
673 678 cpuid_intel_getids(cpu_t *cpu, void *feature)
674 679 {
675 680 uint_t i;
676 681 uint_t chipid_shift = 0;
677 682 uint_t coreid_shift = 0;
678 683 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
679 684
680 685 for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
681 686 chipid_shift++;
682 687
683 688 cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
684 689 cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
685 690
686 691 if (is_x86_feature(feature, X86FSET_CMP)) {
687 692 /*
688 693 * Multi-core (and possibly multi-threaded)
689 694 * processors.
690 695 */
691 696 uint_t ncpu_per_core;
692 697 if (cpi->cpi_ncore_per_chip == 1)
693 698 ncpu_per_core = cpi->cpi_ncpu_per_chip;
694 699 else if (cpi->cpi_ncore_per_chip > 1)
695 700 ncpu_per_core = cpi->cpi_ncpu_per_chip /
696 701 cpi->cpi_ncore_per_chip;
697 702 /*
698 703 * 8bit APIC IDs on dual core Pentiums
699 704 * look like this:
700 705 *
701 706 * +-----------------------+------+------+
702 707 * | Physical Package ID | MC | HT |
703 708 * +-----------------------+------+------+
704 709 * <------- chipid -------->
705 710 * <------- coreid --------------->
706 711 * <--- clogid -->
707 712 * <------>
708 713 * pkgcoreid
709 714 *
710 715 * Where the number of bits necessary to
711 716 * represent MC and HT fields together equals
712 717 * to the minimum number of bits necessary to
713 718 * store the value of cpi->cpi_ncpu_per_chip.
714 719 * Of those bits, the MC part uses the number
715 720 * of bits necessary to store the value of
716 721 * cpi->cpi_ncore_per_chip.
717 722 */
718 723 for (i = 1; i < ncpu_per_core; i <<= 1)
719 724 coreid_shift++;
↓ open down ↓ |
364 lines elided |
↑ open up ↑ |
720 725 cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
721 726 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
722 727 } else if (is_x86_feature(feature, X86FSET_HTT)) {
723 728 /*
724 729 * Single-core multi-threaded processors.
725 730 */
726 731 cpi->cpi_coreid = cpi->cpi_chipid;
727 732 cpi->cpi_pkgcoreid = 0;
728 733 }
729 734 cpi->cpi_procnodeid = cpi->cpi_chipid;
735 + cpi->cpi_compunitid = cpi->cpi_coreid;
730 736 }
731 737
732 738 static void
733 739 cpuid_amd_getids(cpu_t *cpu)
734 740 {
735 741 int i, first_half, coreidsz;
736 742 uint32_t nb_caps_reg;
737 743 uint_t node2_1;
738 744 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
745 + struct cpuid_regs *cp;
739 746
740 747 /*
741 748 * AMD CMP chips currently have a single thread per core.
742 749 *
743 750 * Since no two cpus share a core we must assign a distinct coreid
744 751 * per cpu, and we do this by using the cpu_id. This scheme does not,
745 752 * however, guarantee that sibling cores of a chip will have sequential
746 753 * coreids starting at a multiple of the number of cores per chip -
747 754 * that is usually the case, but if the ACPI MADT table is presented
748 755 * in a different order then we need to perform a few more gymnastics
749 756 * for the pkgcoreid.
750 757 *
751 758 * All processors in the system have the same number of enabled
752 759 * cores. Cores within a processor are always numbered sequentially
753 760 * from 0 regardless of how many or which are disabled, and there
754 761 * is no way for operating system to discover the real core id when some
755 762 * are disabled.
763 + *
764 + * In family 0x15, the cores come in pairs called compute units. They
765 + * share I$ and L2 caches and the FPU. Enumeration of this feature is
766 + * simplified by the new topology extensions CPUID leaf, indicated by
767 + * the X86 feature X86FSET_TOPOEXT.
756 768 */
757 769
758 770 cpi->cpi_coreid = cpu->cpu_id;
771 + cpi->cpi_compunitid = cpu->cpu_id;
759 772
760 773 if (cpi->cpi_xmaxeax >= 0x80000008) {
761 774
762 775 coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
763 776
764 777 /*
765 778 * In AMD parlance chip is really a node while Solaris
766 779 * sees chip as equivalent to socket/package.
767 780 */
768 781 cpi->cpi_ncore_per_chip =
769 782 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
770 783 if (coreidsz == 0) {
771 784 /* Use legacy method */
772 785 for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
773 786 coreidsz++;
774 787 if (coreidsz == 0)
775 788 coreidsz = 1;
776 789 }
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
777 790 } else {
778 791 /* Assume single-core part */
779 792 cpi->cpi_ncore_per_chip = 1;
780 793 coreidsz = 1;
781 794 }
782 795
783 796 cpi->cpi_clogid = cpi->cpi_pkgcoreid =
784 797 cpi->cpi_apicid & ((1<<coreidsz) - 1);
785 798 cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
786 799
787 - /* Get nodeID */
788 - if (cpi->cpi_family == 0xf) {
800 + /* Get node ID, compute unit ID */
801 + if (is_x86_feature(x86_featureset, X86FSET_TOPOEXT) &&
802 + cpi->cpi_xmaxeax >= 0x8000001e) {
803 + cp = &cpi->cpi_extd[0x1e];
804 + cp->cp_eax = 0x8000001e;
805 + (void) __cpuid_insn(cp);
806 +
807 + cpi->cpi_procnodes_per_pkg = BITX(cp->cp_ecx, 10, 8) + 1;
808 + cpi->cpi_procnodeid = BITX(cp->cp_ecx, 7, 0);
809 + cpi->cpi_cores_per_compunit = BITX(cp->cp_ebx, 15, 8) + 1;
810 + cpi->cpi_compunitid = BITX(cp->cp_ebx, 7, 0)
811 + + (cpi->cpi_ncore_per_chip / cpi->cpi_cores_per_compunit)
812 + * (cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg);
813 + } else if (cpi->cpi_family == 0xf || cpi->cpi_family >= 0x11) {
789 814 cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
790 - cpi->cpi_chipid = cpi->cpi_procnodeid;
791 815 } else if (cpi->cpi_family == 0x10) {
792 816 /*
793 817 * See if we are a multi-node processor.
794 818 * All processors in the system have the same number of nodes
795 819 */
796 820 nb_caps_reg = pci_getl_func(0, 24, 3, 0xe8);
797 821 if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
798 822 /* Single-node */
799 823 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
800 824 coreidsz);
801 - cpi->cpi_chipid = cpi->cpi_procnodeid;
802 825 } else {
803 826
804 827 /*
805 828 * Multi-node revision D (2 nodes per package
806 829 * are supported)
807 830 */
808 831 cpi->cpi_procnodes_per_pkg = 2;
809 832
810 833 first_half = (cpi->cpi_pkgcoreid <=
811 834 (cpi->cpi_ncore_per_chip/2 - 1));
812 835
813 836 if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
814 837 /* We are BSP */
815 838 cpi->cpi_procnodeid = (first_half ? 0 : 1);
816 - cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
817 839 } else {
818 840
819 841 /* We are AP */
820 842 /* NodeId[2:1] bits to use for reading F3xe8 */
821 843 node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
822 844
823 845 nb_caps_reg =
824 846 pci_getl_func(0, 24 + node2_1, 3, 0xe8);
825 847
826 848 /*
827 849 * Check IntNodeNum bit (31:30, but bit 31 is
828 850 * always 0 on dual-node processors)
829 851 */
830 852 if (BITX(nb_caps_reg, 30, 30) == 0)
831 853 cpi->cpi_procnodeid = node2_1 +
832 854 !first_half;
833 855 else
834 856 cpi->cpi_procnodeid = node2_1 +
835 857 first_half;
836 -
837 - cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
838 858 }
839 859 }
840 - } else if (cpi->cpi_family >= 0x11) {
841 - cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
842 - cpi->cpi_chipid = cpi->cpi_procnodeid;
843 860 } else {
844 861 cpi->cpi_procnodeid = 0;
845 - cpi->cpi_chipid = cpi->cpi_procnodeid;
846 862 }
863 +
864 + cpi->cpi_chipid =
865 + cpi->cpi_procnodeid / cpi->cpi_procnodes_per_pkg;
847 866 }
848 867
849 868 /*
850 869 * Setup XFeature_Enabled_Mask register. Required by xsave feature.
851 870 */
852 871 void
853 872 setup_xfem(void)
854 873 {
855 874 uint64_t flags = XFEATURE_LEGACY_FP;
856 875
857 876 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
858 877
859 878 if (is_x86_feature(x86_featureset, X86FSET_SSE))
860 879 flags |= XFEATURE_SSE;
861 880
862 881 if (is_x86_feature(x86_featureset, X86FSET_AVX))
863 882 flags |= XFEATURE_AVX;
864 883
865 884 set_xcr(XFEATURE_ENABLED_MASK, flags);
866 885
867 886 xsave_bv_all = flags;
868 887 }
869 888
870 889 void
871 890 cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
872 891 {
873 892 uint32_t mask_ecx, mask_edx;
874 893 struct cpuid_info *cpi;
875 894 struct cpuid_regs *cp;
876 895 int xcpuid;
877 896 #if !defined(__xpv)
878 897 extern int idle_cpu_prefer_mwait;
879 898 #endif
880 899
881 900 /*
882 901 * Space statically allocated for BSP, ensure pointer is set
883 902 */
884 903 if (cpu->cpu_id == 0) {
885 904 if (cpu->cpu_m.mcpu_cpi == NULL)
886 905 cpu->cpu_m.mcpu_cpi = &cpuid_info0;
887 906 }
888 907
889 908 add_x86_feature(featureset, X86FSET_CPUID);
890 909
891 910 cpi = cpu->cpu_m.mcpu_cpi;
892 911 ASSERT(cpi != NULL);
893 912 cp = &cpi->cpi_std[0];
894 913 cp->cp_eax = 0;
895 914 cpi->cpi_maxeax = __cpuid_insn(cp);
896 915 {
897 916 uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
898 917 *iptr++ = cp->cp_ebx;
899 918 *iptr++ = cp->cp_edx;
900 919 *iptr++ = cp->cp_ecx;
901 920 *(char *)&cpi->cpi_vendorstr[12] = '\0';
902 921 }
903 922
904 923 cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
905 924 x86_vendor = cpi->cpi_vendor; /* for compatibility */
906 925
907 926 /*
908 927 * Limit the range in case of weird hardware
909 928 */
910 929 if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
911 930 cpi->cpi_maxeax = CPI_MAXEAX_MAX;
912 931 if (cpi->cpi_maxeax < 1)
913 932 goto pass1_done;
914 933
915 934 cp = &cpi->cpi_std[1];
916 935 cp->cp_eax = 1;
917 936 (void) __cpuid_insn(cp);
918 937
919 938 /*
920 939 * Extract identifying constants for easy access.
921 940 */
922 941 cpi->cpi_model = CPI_MODEL(cpi);
923 942 cpi->cpi_family = CPI_FAMILY(cpi);
924 943
925 944 if (cpi->cpi_family == 0xf)
926 945 cpi->cpi_family += CPI_FAMILY_XTD(cpi);
927 946
928 947 /*
929 948 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
930 949 * Intel, and presumably everyone else, uses model == 0xf, as
931 950 * one would expect (max value means possible overflow). Sigh.
932 951 */
933 952
934 953 switch (cpi->cpi_vendor) {
935 954 case X86_VENDOR_Intel:
936 955 if (IS_EXTENDED_MODEL_INTEL(cpi))
937 956 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
938 957 break;
939 958 case X86_VENDOR_AMD:
940 959 if (CPI_FAMILY(cpi) == 0xf)
941 960 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
942 961 break;
943 962 default:
944 963 if (cpi->cpi_model == 0xf)
945 964 cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
946 965 break;
947 966 }
948 967
949 968 cpi->cpi_step = CPI_STEP(cpi);
950 969 cpi->cpi_brandid = CPI_BRANDID(cpi);
951 970
952 971 /*
953 972 * *default* assumptions:
954 973 * - believe %edx feature word
955 974 * - ignore %ecx feature word
956 975 * - 32-bit virtual and physical addressing
957 976 */
958 977 mask_edx = 0xffffffff;
959 978 mask_ecx = 0;
960 979
961 980 cpi->cpi_pabits = cpi->cpi_vabits = 32;
962 981
963 982 switch (cpi->cpi_vendor) {
964 983 case X86_VENDOR_Intel:
965 984 if (cpi->cpi_family == 5)
966 985 x86_type = X86_TYPE_P5;
967 986 else if (IS_LEGACY_P6(cpi)) {
968 987 x86_type = X86_TYPE_P6;
969 988 pentiumpro_bug4046376 = 1;
970 989 pentiumpro_bug4064495 = 1;
971 990 /*
972 991 * Clear the SEP bit when it was set erroneously
973 992 */
974 993 if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
975 994 cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
976 995 } else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
977 996 x86_type = X86_TYPE_P4;
978 997 /*
979 998 * We don't currently depend on any of the %ecx
980 999 * features until Prescott, so we'll only check
981 1000 * this from P4 onwards. We might want to revisit
982 1001 * that idea later.
983 1002 */
984 1003 mask_ecx = 0xffffffff;
985 1004 } else if (cpi->cpi_family > 0xf)
986 1005 mask_ecx = 0xffffffff;
987 1006 /*
988 1007 * We don't support MONITOR/MWAIT if leaf 5 is not available
989 1008 * to obtain the monitor linesize.
990 1009 */
991 1010 if (cpi->cpi_maxeax < 5)
992 1011 mask_ecx &= ~CPUID_INTC_ECX_MON;
993 1012 break;
994 1013 case X86_VENDOR_IntelClone:
995 1014 default:
996 1015 break;
997 1016 case X86_VENDOR_AMD:
998 1017 #if defined(OPTERON_ERRATUM_108)
999 1018 if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1000 1019 cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1001 1020 cpi->cpi_model = 0xc;
1002 1021 } else
1003 1022 #endif
1004 1023 if (cpi->cpi_family == 5) {
1005 1024 /*
1006 1025 * AMD K5 and K6
1007 1026 *
1008 1027 * These CPUs have an incomplete implementation
1009 1028 * of MCA/MCE which we mask away.
1010 1029 */
1011 1030 mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1012 1031
1013 1032 /*
1014 1033 * Model 0 uses the wrong (APIC) bit
1015 1034 * to indicate PGE. Fix it here.
1016 1035 */
1017 1036 if (cpi->cpi_model == 0) {
1018 1037 if (cp->cp_edx & 0x200) {
1019 1038 cp->cp_edx &= ~0x200;
1020 1039 cp->cp_edx |= CPUID_INTC_EDX_PGE;
1021 1040 }
1022 1041 }
1023 1042
1024 1043 /*
1025 1044 * Early models had problems w/ MMX; disable.
1026 1045 */
1027 1046 if (cpi->cpi_model < 6)
1028 1047 mask_edx &= ~CPUID_INTC_EDX_MMX;
1029 1048 }
1030 1049
1031 1050 /*
1032 1051 * For newer families, SSE3 and CX16, at least, are valid;
1033 1052 * enable all
1034 1053 */
1035 1054 if (cpi->cpi_family >= 0xf)
1036 1055 mask_ecx = 0xffffffff;
1037 1056 /*
1038 1057 * We don't support MONITOR/MWAIT if leaf 5 is not available
1039 1058 * to obtain the monitor linesize.
1040 1059 */
1041 1060 if (cpi->cpi_maxeax < 5)
1042 1061 mask_ecx &= ~CPUID_INTC_ECX_MON;
1043 1062
1044 1063 #if !defined(__xpv)
1045 1064 /*
1046 1065 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1047 1066 * processors. AMD does not intend MWAIT to be used in the cpu
1048 1067 * idle loop on current and future processors. 10h and future
1049 1068 * AMD processors use more power in MWAIT than HLT.
1050 1069 * Pre-family-10h Opterons do not have the MWAIT instruction.
1051 1070 */
1052 1071 idle_cpu_prefer_mwait = 0;
1053 1072 #endif
1054 1073
1055 1074 break;
1056 1075 case X86_VENDOR_TM:
1057 1076 /*
1058 1077 * workaround the NT workaround in CMS 4.1
1059 1078 */
1060 1079 if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1061 1080 (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1062 1081 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1063 1082 break;
1064 1083 case X86_VENDOR_Centaur:
1065 1084 /*
1066 1085 * workaround the NT workarounds again
1067 1086 */
1068 1087 if (cpi->cpi_family == 6)
1069 1088 cp->cp_edx |= CPUID_INTC_EDX_CX8;
1070 1089 break;
1071 1090 case X86_VENDOR_Cyrix:
1072 1091 /*
1073 1092 * We rely heavily on the probing in locore
1074 1093 * to actually figure out what parts, if any,
1075 1094 * of the Cyrix cpuid instruction to believe.
1076 1095 */
1077 1096 switch (x86_type) {
1078 1097 case X86_TYPE_CYRIX_486:
1079 1098 mask_edx = 0;
1080 1099 break;
1081 1100 case X86_TYPE_CYRIX_6x86:
1082 1101 mask_edx = 0;
1083 1102 break;
1084 1103 case X86_TYPE_CYRIX_6x86L:
1085 1104 mask_edx =
1086 1105 CPUID_INTC_EDX_DE |
1087 1106 CPUID_INTC_EDX_CX8;
1088 1107 break;
1089 1108 case X86_TYPE_CYRIX_6x86MX:
1090 1109 mask_edx =
1091 1110 CPUID_INTC_EDX_DE |
1092 1111 CPUID_INTC_EDX_MSR |
1093 1112 CPUID_INTC_EDX_CX8 |
1094 1113 CPUID_INTC_EDX_PGE |
1095 1114 CPUID_INTC_EDX_CMOV |
1096 1115 CPUID_INTC_EDX_MMX;
1097 1116 break;
1098 1117 case X86_TYPE_CYRIX_GXm:
1099 1118 mask_edx =
1100 1119 CPUID_INTC_EDX_MSR |
1101 1120 CPUID_INTC_EDX_CX8 |
1102 1121 CPUID_INTC_EDX_CMOV |
1103 1122 CPUID_INTC_EDX_MMX;
1104 1123 break;
1105 1124 case X86_TYPE_CYRIX_MediaGX:
1106 1125 break;
1107 1126 case X86_TYPE_CYRIX_MII:
1108 1127 case X86_TYPE_VIA_CYRIX_III:
1109 1128 mask_edx =
1110 1129 CPUID_INTC_EDX_DE |
1111 1130 CPUID_INTC_EDX_TSC |
1112 1131 CPUID_INTC_EDX_MSR |
1113 1132 CPUID_INTC_EDX_CX8 |
1114 1133 CPUID_INTC_EDX_PGE |
1115 1134 CPUID_INTC_EDX_CMOV |
1116 1135 CPUID_INTC_EDX_MMX;
1117 1136 break;
1118 1137 default:
1119 1138 break;
1120 1139 }
1121 1140 break;
1122 1141 }
1123 1142
1124 1143 #if defined(__xpv)
1125 1144 /*
1126 1145 * Do not support MONITOR/MWAIT under a hypervisor
1127 1146 */
1128 1147 mask_ecx &= ~CPUID_INTC_ECX_MON;
1129 1148 /*
1130 1149 * Do not support XSAVE under a hypervisor for now
1131 1150 */
1132 1151 xsave_force_disable = B_TRUE;
1133 1152
1134 1153 #endif /* __xpv */
1135 1154
1136 1155 if (xsave_force_disable) {
1137 1156 mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1138 1157 mask_ecx &= ~CPUID_INTC_ECX_AVX;
1139 1158 }
1140 1159
1141 1160 /*
1142 1161 * Now we've figured out the masks that determine
1143 1162 * which bits we choose to believe, apply the masks
1144 1163 * to the feature words, then map the kernel's view
1145 1164 * of these feature words into its feature word.
1146 1165 */
1147 1166 cp->cp_edx &= mask_edx;
1148 1167 cp->cp_ecx &= mask_ecx;
1149 1168
1150 1169 /*
1151 1170 * apply any platform restrictions (we don't call this
1152 1171 * immediately after __cpuid_insn here, because we need the
1153 1172 * workarounds applied above first)
1154 1173 */
1155 1174 platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1156 1175
1157 1176 /*
1158 1177 * fold in overrides from the "eeprom" mechanism
1159 1178 */
1160 1179 cp->cp_edx |= cpuid_feature_edx_include;
1161 1180 cp->cp_edx &= ~cpuid_feature_edx_exclude;
1162 1181
1163 1182 cp->cp_ecx |= cpuid_feature_ecx_include;
1164 1183 cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1165 1184
1166 1185 if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1167 1186 add_x86_feature(featureset, X86FSET_LARGEPAGE);
1168 1187 }
1169 1188 if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1170 1189 add_x86_feature(featureset, X86FSET_TSC);
1171 1190 }
1172 1191 if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1173 1192 add_x86_feature(featureset, X86FSET_MSR);
1174 1193 }
1175 1194 if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1176 1195 add_x86_feature(featureset, X86FSET_MTRR);
1177 1196 }
1178 1197 if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1179 1198 add_x86_feature(featureset, X86FSET_PGE);
1180 1199 }
1181 1200 if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1182 1201 add_x86_feature(featureset, X86FSET_CMOV);
1183 1202 }
1184 1203 if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1185 1204 add_x86_feature(featureset, X86FSET_MMX);
1186 1205 }
1187 1206 if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1188 1207 (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1189 1208 add_x86_feature(featureset, X86FSET_MCA);
1190 1209 }
1191 1210 if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1192 1211 add_x86_feature(featureset, X86FSET_PAE);
1193 1212 }
1194 1213 if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1195 1214 add_x86_feature(featureset, X86FSET_CX8);
1196 1215 }
1197 1216 if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1198 1217 add_x86_feature(featureset, X86FSET_CX16);
1199 1218 }
1200 1219 if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1201 1220 add_x86_feature(featureset, X86FSET_PAT);
1202 1221 }
1203 1222 if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1204 1223 add_x86_feature(featureset, X86FSET_SEP);
1205 1224 }
1206 1225 if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1207 1226 /*
1208 1227 * In our implementation, fxsave/fxrstor
1209 1228 * are prerequisites before we'll even
1210 1229 * try and do SSE things.
1211 1230 */
1212 1231 if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1213 1232 add_x86_feature(featureset, X86FSET_SSE);
1214 1233 }
1215 1234 if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1216 1235 add_x86_feature(featureset, X86FSET_SSE2);
1217 1236 }
1218 1237 if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1219 1238 add_x86_feature(featureset, X86FSET_SSE3);
1220 1239 }
1221 1240 if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1222 1241 add_x86_feature(featureset, X86FSET_SSSE3);
1223 1242 }
1224 1243 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1225 1244 add_x86_feature(featureset, X86FSET_SSE4_1);
1226 1245 }
1227 1246 if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1228 1247 add_x86_feature(featureset, X86FSET_SSE4_2);
1229 1248 }
1230 1249 if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1231 1250 add_x86_feature(featureset, X86FSET_AES);
1232 1251 }
1233 1252 if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1234 1253 add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1235 1254 }
1236 1255
1237 1256 if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1238 1257 add_x86_feature(featureset, X86FSET_XSAVE);
1239 1258 /* We only test AVX when there is XSAVE */
1240 1259 if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1241 1260 add_x86_feature(featureset,
1242 1261 X86FSET_AVX);
1243 1262 }
1244 1263 }
1245 1264 }
1246 1265 if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1247 1266 add_x86_feature(featureset, X86FSET_DE);
1248 1267 }
1249 1268 #if !defined(__xpv)
1250 1269 if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1251 1270
1252 1271 /*
1253 1272 * We require the CLFLUSH instruction for erratum workaround
1254 1273 * to use MONITOR/MWAIT.
1255 1274 */
1256 1275 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1257 1276 cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1258 1277 add_x86_feature(featureset, X86FSET_MWAIT);
1259 1278 } else {
1260 1279 extern int idle_cpu_assert_cflush_monitor;
1261 1280
1262 1281 /*
1263 1282 * All processors we are aware of which have
1264 1283 * MONITOR/MWAIT also have CLFLUSH.
1265 1284 */
1266 1285 if (idle_cpu_assert_cflush_monitor) {
1267 1286 ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1268 1287 (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1269 1288 }
1270 1289 }
1271 1290 }
1272 1291 #endif /* __xpv */
1273 1292
1274 1293 if (cp->cp_ecx & CPUID_INTC_ECX_VMX) {
1275 1294 add_x86_feature(featureset, X86FSET_VMX);
1276 1295 }
1277 1296
1278 1297 /*
1279 1298 * Only need it first time, rest of the cpus would follow suit.
1280 1299 * we only capture this for the bootcpu.
1281 1300 */
1282 1301 if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1283 1302 add_x86_feature(featureset, X86FSET_CLFSH);
1284 1303 x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1285 1304 }
1286 1305 if (is_x86_feature(featureset, X86FSET_PAE))
1287 1306 cpi->cpi_pabits = 36;
1288 1307
1289 1308 /*
1290 1309 * Hyperthreading configuration is slightly tricky on Intel
1291 1310 * and pure clones, and even trickier on AMD.
1292 1311 *
1293 1312 * (AMD chose to set the HTT bit on their CMP processors,
1294 1313 * even though they're not actually hyperthreaded. Thus it
1295 1314 * takes a bit more work to figure out what's really going
1296 1315 * on ... see the handling of the CMP_LGCY bit below)
1297 1316 */
1298 1317 if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1299 1318 cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1300 1319 if (cpi->cpi_ncpu_per_chip > 1)
1301 1320 add_x86_feature(featureset, X86FSET_HTT);
1302 1321 } else {
1303 1322 cpi->cpi_ncpu_per_chip = 1;
1304 1323 }
1305 1324
1306 1325 /*
1307 1326 * Work on the "extended" feature information, doing
1308 1327 * some basic initialization for cpuid_pass2()
1309 1328 */
1310 1329 xcpuid = 0;
1311 1330 switch (cpi->cpi_vendor) {
1312 1331 case X86_VENDOR_Intel:
1313 1332 if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1314 1333 xcpuid++;
1315 1334 break;
1316 1335 case X86_VENDOR_AMD:
1317 1336 if (cpi->cpi_family > 5 ||
1318 1337 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1319 1338 xcpuid++;
1320 1339 break;
1321 1340 case X86_VENDOR_Cyrix:
1322 1341 /*
1323 1342 * Only these Cyrix CPUs are -known- to support
1324 1343 * extended cpuid operations.
1325 1344 */
1326 1345 if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1327 1346 x86_type == X86_TYPE_CYRIX_GXm)
1328 1347 xcpuid++;
1329 1348 break;
1330 1349 case X86_VENDOR_Centaur:
1331 1350 case X86_VENDOR_TM:
1332 1351 default:
1333 1352 xcpuid++;
1334 1353 break;
1335 1354 }
1336 1355
1337 1356 if (xcpuid) {
1338 1357 cp = &cpi->cpi_extd[0];
1339 1358 cp->cp_eax = 0x80000000;
1340 1359 cpi->cpi_xmaxeax = __cpuid_insn(cp);
1341 1360 }
1342 1361
1343 1362 if (cpi->cpi_xmaxeax & 0x80000000) {
1344 1363
1345 1364 if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1346 1365 cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1347 1366
1348 1367 switch (cpi->cpi_vendor) {
1349 1368 case X86_VENDOR_Intel:
1350 1369 case X86_VENDOR_AMD:
1351 1370 if (cpi->cpi_xmaxeax < 0x80000001)
1352 1371 break;
1353 1372 cp = &cpi->cpi_extd[1];
1354 1373 cp->cp_eax = 0x80000001;
1355 1374 (void) __cpuid_insn(cp);
1356 1375
1357 1376 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1358 1377 cpi->cpi_family == 5 &&
1359 1378 cpi->cpi_model == 6 &&
1360 1379 cpi->cpi_step == 6) {
1361 1380 /*
1362 1381 * K6 model 6 uses bit 10 to indicate SYSC
1363 1382 * Later models use bit 11. Fix it here.
1364 1383 */
1365 1384 if (cp->cp_edx & 0x400) {
1366 1385 cp->cp_edx &= ~0x400;
1367 1386 cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1368 1387 }
1369 1388 }
1370 1389
1371 1390 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1372 1391
1373 1392 /*
1374 1393 * Compute the additions to the kernel's feature word.
1375 1394 */
1376 1395 if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1377 1396 add_x86_feature(featureset, X86FSET_NX);
1378 1397 }
1379 1398
1380 1399 /*
1381 1400 * Regardless whether or not we boot 64-bit,
1382 1401 * we should have a way to identify whether
1383 1402 * the CPU is capable of running 64-bit.
1384 1403 */
1385 1404 if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1386 1405 add_x86_feature(featureset, X86FSET_64);
1387 1406 }
1388 1407
1389 1408 #if defined(__amd64)
1390 1409 /* 1 GB large page - enable only for 64 bit kernel */
1391 1410 if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1392 1411 add_x86_feature(featureset, X86FSET_1GPG);
1393 1412 }
1394 1413 #endif
1395 1414
1396 1415 if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1397 1416 (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1398 1417 (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1399 1418 add_x86_feature(featureset, X86FSET_SSE4A);
1400 1419 }
1401 1420
1402 1421 /*
1403 1422 * If both the HTT and CMP_LGCY bits are set,
1404 1423 * then we're not actually HyperThreaded. Read
1405 1424 * "AMD CPUID Specification" for more details.
1406 1425 */
1407 1426 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1408 1427 is_x86_feature(featureset, X86FSET_HTT) &&
1409 1428 (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1410 1429 remove_x86_feature(featureset, X86FSET_HTT);
1411 1430 add_x86_feature(featureset, X86FSET_CMP);
1412 1431 }
1413 1432 #if defined(__amd64)
1414 1433 /*
1415 1434 * It's really tricky to support syscall/sysret in
1416 1435 * the i386 kernel; we rely on sysenter/sysexit
1417 1436 * instead. In the amd64 kernel, things are -way-
1418 1437 * better.
1419 1438 */
1420 1439 if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1421 1440 add_x86_feature(featureset, X86FSET_ASYSC);
1422 1441 }
1423 1442
1424 1443 /*
1425 1444 * While we're thinking about system calls, note
1426 1445 * that AMD processors don't support sysenter
1427 1446 * in long mode at all, so don't try to program them.
1428 1447 */
1429 1448 if (x86_vendor == X86_VENDOR_AMD) {
↓ open down ↓ |
573 lines elided |
↑ open up ↑ |
1430 1449 remove_x86_feature(featureset, X86FSET_SEP);
1431 1450 }
1432 1451 #endif
1433 1452 if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1434 1453 add_x86_feature(featureset, X86FSET_TSCP);
1435 1454 }
1436 1455
1437 1456 if (cp->cp_ecx & CPUID_AMD_ECX_SVM) {
1438 1457 add_x86_feature(featureset, X86FSET_SVM);
1439 1458 }
1459 +
1460 + if (cp->cp_ecx & CPUID_AMD_ECX_TOPOEXT) {
1461 + add_x86_feature(featureset, X86FSET_TOPOEXT);
1462 + }
1440 1463 break;
1441 1464 default:
1442 1465 break;
1443 1466 }
1444 1467
1445 1468 /*
1446 1469 * Get CPUID data about processor cores and hyperthreads.
1447 1470 */
1448 1471 switch (cpi->cpi_vendor) {
1449 1472 case X86_VENDOR_Intel:
1450 1473 if (cpi->cpi_maxeax >= 4) {
1451 1474 cp = &cpi->cpi_std[4];
1452 1475 cp->cp_eax = 4;
1453 1476 cp->cp_ecx = 0;
1454 1477 (void) __cpuid_insn(cp);
1455 1478 platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1456 1479 }
1457 1480 /*FALLTHROUGH*/
1458 1481 case X86_VENDOR_AMD:
1459 1482 if (cpi->cpi_xmaxeax < 0x80000008)
1460 1483 break;
1461 1484 cp = &cpi->cpi_extd[8];
1462 1485 cp->cp_eax = 0x80000008;
1463 1486 (void) __cpuid_insn(cp);
1464 1487 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1465 1488
1466 1489 /*
1467 1490 * Virtual and physical address limits from
1468 1491 * cpuid override previously guessed values.
1469 1492 */
1470 1493 cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1471 1494 cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1472 1495 break;
1473 1496 default:
1474 1497 break;
1475 1498 }
1476 1499
1477 1500 /*
1478 1501 * Derive the number of cores per chip
1479 1502 */
1480 1503 switch (cpi->cpi_vendor) {
1481 1504 case X86_VENDOR_Intel:
1482 1505 if (cpi->cpi_maxeax < 4) {
1483 1506 cpi->cpi_ncore_per_chip = 1;
1484 1507 break;
1485 1508 } else {
1486 1509 cpi->cpi_ncore_per_chip =
1487 1510 BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1488 1511 }
1489 1512 break;
1490 1513 case X86_VENDOR_AMD:
1491 1514 if (cpi->cpi_xmaxeax < 0x80000008) {
1492 1515 cpi->cpi_ncore_per_chip = 1;
1493 1516 break;
1494 1517 } else {
1495 1518 /*
1496 1519 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1497 1520 * 1 less than the number of physical cores on
1498 1521 * the chip. In family 0x10 this value can
1499 1522 * be affected by "downcoring" - it reflects
1500 1523 * 1 less than the number of cores actually
1501 1524 * enabled on this node.
1502 1525 */
1503 1526 cpi->cpi_ncore_per_chip =
1504 1527 BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1505 1528 }
1506 1529 break;
1507 1530 default:
1508 1531 cpi->cpi_ncore_per_chip = 1;
1509 1532 break;
1510 1533 }
1511 1534
1512 1535 /*
1513 1536 * Get CPUID data about TSC Invariance in Deep C-State.
1514 1537 */
1515 1538 switch (cpi->cpi_vendor) {
1516 1539 case X86_VENDOR_Intel:
1517 1540 if (cpi->cpi_maxeax >= 7) {
1518 1541 cp = &cpi->cpi_extd[7];
1519 1542 cp->cp_eax = 0x80000007;
1520 1543 cp->cp_ecx = 0;
1521 1544 (void) __cpuid_insn(cp);
1522 1545 }
1523 1546 break;
1524 1547 default:
1525 1548 break;
1526 1549 }
1527 1550 } else {
1528 1551 cpi->cpi_ncore_per_chip = 1;
1529 1552 }
1530 1553
1531 1554 /*
1532 1555 * If more than one core, then this processor is CMP.
1533 1556 */
1534 1557 if (cpi->cpi_ncore_per_chip > 1) {
1535 1558 add_x86_feature(featureset, X86FSET_CMP);
1536 1559 }
1537 1560
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
1538 1561 /*
1539 1562 * If the number of cores is the same as the number
1540 1563 * of CPUs, then we cannot have HyperThreading.
1541 1564 */
1542 1565 if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1543 1566 remove_x86_feature(featureset, X86FSET_HTT);
1544 1567 }
1545 1568
1546 1569 cpi->cpi_apicid = CPI_APIC_ID(cpi);
1547 1570 cpi->cpi_procnodes_per_pkg = 1;
1571 + cpi->cpi_cores_per_compunit = 1;
1548 1572 if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1549 1573 is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1550 1574 /*
1551 1575 * Single-core single-threaded processors.
1552 1576 */
1553 1577 cpi->cpi_chipid = -1;
1554 1578 cpi->cpi_clogid = 0;
1555 1579 cpi->cpi_coreid = cpu->cpu_id;
1556 1580 cpi->cpi_pkgcoreid = 0;
1557 1581 if (cpi->cpi_vendor == X86_VENDOR_AMD)
1558 1582 cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1559 1583 else
1560 1584 cpi->cpi_procnodeid = cpi->cpi_chipid;
1561 1585 } else if (cpi->cpi_ncpu_per_chip > 1) {
1562 1586 if (cpi->cpi_vendor == X86_VENDOR_Intel)
1563 1587 cpuid_intel_getids(cpu, featureset);
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1564 1588 else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1565 1589 cpuid_amd_getids(cpu);
1566 1590 else {
1567 1591 /*
1568 1592 * All other processors are currently
1569 1593 * assumed to have single cores.
1570 1594 */
1571 1595 cpi->cpi_coreid = cpi->cpi_chipid;
1572 1596 cpi->cpi_pkgcoreid = 0;
1573 1597 cpi->cpi_procnodeid = cpi->cpi_chipid;
1598 + cpi->cpi_compunitid = cpi->cpi_chipid;
1574 1599 }
1575 1600 }
1576 1601
1577 1602 /*
1578 1603 * Synthesize chip "revision" and socket type
1579 1604 */
1580 1605 cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1581 1606 cpi->cpi_model, cpi->cpi_step);
1582 1607 cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1583 1608 cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1584 1609 cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1585 1610 cpi->cpi_model, cpi->cpi_step);
1586 1611
1587 1612 pass1_done:
1588 1613 cpi->cpi_pass = 1;
1589 1614 }
1590 1615
1591 1616 /*
1592 1617 * Make copies of the cpuid table entries we depend on, in
1593 1618 * part for ease of parsing now, in part so that we have only
1594 1619 * one place to correct any of it, in part for ease of
1595 1620 * later export to userland, and in part so we can look at
1596 1621 * this stuff in a crash dump.
1597 1622 */
1598 1623
1599 1624 /*ARGSUSED*/
1600 1625 void
1601 1626 cpuid_pass2(cpu_t *cpu)
1602 1627 {
1603 1628 uint_t n, nmax;
1604 1629 int i;
1605 1630 struct cpuid_regs *cp;
1606 1631 uint8_t *dp;
1607 1632 uint32_t *iptr;
1608 1633 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1609 1634
1610 1635 ASSERT(cpi->cpi_pass == 1);
1611 1636
1612 1637 if (cpi->cpi_maxeax < 1)
1613 1638 goto pass2_done;
1614 1639
1615 1640 if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1616 1641 nmax = NMAX_CPI_STD;
1617 1642 /*
1618 1643 * (We already handled n == 0 and n == 1 in pass 1)
1619 1644 */
1620 1645 for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1621 1646 cp->cp_eax = n;
1622 1647
1623 1648 /*
1624 1649 * CPUID function 4 expects %ecx to be initialized
1625 1650 * with an index which indicates which cache to return
1626 1651 * information about. The OS is expected to call function 4
1627 1652 * with %ecx set to 0, 1, 2, ... until it returns with
1628 1653 * EAX[4:0] set to 0, which indicates there are no more
1629 1654 * caches.
1630 1655 *
1631 1656 * Here, populate cpi_std[4] with the information returned by
1632 1657 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1633 1658 * when dynamic memory allocation becomes available.
1634 1659 *
1635 1660 * Note: we need to explicitly initialize %ecx here, since
1636 1661 * function 4 may have been previously invoked.
1637 1662 */
1638 1663 if (n == 4)
1639 1664 cp->cp_ecx = 0;
1640 1665
1641 1666 (void) __cpuid_insn(cp);
1642 1667 platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1643 1668 switch (n) {
1644 1669 case 2:
1645 1670 /*
1646 1671 * "the lower 8 bits of the %eax register
1647 1672 * contain a value that identifies the number
1648 1673 * of times the cpuid [instruction] has to be
1649 1674 * executed to obtain a complete image of the
1650 1675 * processor's caching systems."
1651 1676 *
1652 1677 * How *do* they make this stuff up?
1653 1678 */
1654 1679 cpi->cpi_ncache = sizeof (*cp) *
1655 1680 BITX(cp->cp_eax, 7, 0);
1656 1681 if (cpi->cpi_ncache == 0)
1657 1682 break;
1658 1683 cpi->cpi_ncache--; /* skip count byte */
1659 1684
1660 1685 /*
1661 1686 * Well, for now, rather than attempt to implement
1662 1687 * this slightly dubious algorithm, we just look
1663 1688 * at the first 15 ..
1664 1689 */
1665 1690 if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1666 1691 cpi->cpi_ncache = sizeof (*cp) - 1;
1667 1692
1668 1693 dp = cpi->cpi_cacheinfo;
1669 1694 if (BITX(cp->cp_eax, 31, 31) == 0) {
1670 1695 uint8_t *p = (void *)&cp->cp_eax;
1671 1696 for (i = 1; i < 4; i++)
1672 1697 if (p[i] != 0)
1673 1698 *dp++ = p[i];
1674 1699 }
1675 1700 if (BITX(cp->cp_ebx, 31, 31) == 0) {
1676 1701 uint8_t *p = (void *)&cp->cp_ebx;
1677 1702 for (i = 0; i < 4; i++)
1678 1703 if (p[i] != 0)
1679 1704 *dp++ = p[i];
1680 1705 }
1681 1706 if (BITX(cp->cp_ecx, 31, 31) == 0) {
1682 1707 uint8_t *p = (void *)&cp->cp_ecx;
1683 1708 for (i = 0; i < 4; i++)
1684 1709 if (p[i] != 0)
1685 1710 *dp++ = p[i];
1686 1711 }
1687 1712 if (BITX(cp->cp_edx, 31, 31) == 0) {
1688 1713 uint8_t *p = (void *)&cp->cp_edx;
1689 1714 for (i = 0; i < 4; i++)
1690 1715 if (p[i] != 0)
1691 1716 *dp++ = p[i];
1692 1717 }
1693 1718 break;
1694 1719
1695 1720 case 3: /* Processor serial number, if PSN supported */
1696 1721 break;
1697 1722
1698 1723 case 4: /* Deterministic cache parameters */
1699 1724 break;
1700 1725
1701 1726 case 5: /* Monitor/Mwait parameters */
1702 1727 {
1703 1728 size_t mwait_size;
1704 1729
1705 1730 /*
1706 1731 * check cpi_mwait.support which was set in cpuid_pass1
1707 1732 */
1708 1733 if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1709 1734 break;
1710 1735
1711 1736 /*
1712 1737 * Protect ourself from insane mwait line size.
1713 1738 * Workaround for incomplete hardware emulator(s).
1714 1739 */
1715 1740 mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1716 1741 if (mwait_size < sizeof (uint32_t) ||
1717 1742 !ISP2(mwait_size)) {
1718 1743 #if DEBUG
1719 1744 cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1720 1745 "size %ld", cpu->cpu_id, (long)mwait_size);
1721 1746 #endif
1722 1747 break;
1723 1748 }
1724 1749
1725 1750 cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1726 1751 cpi->cpi_mwait.mon_max = mwait_size;
1727 1752 if (MWAIT_EXTENSION(cpi)) {
1728 1753 cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1729 1754 if (MWAIT_INT_ENABLE(cpi))
1730 1755 cpi->cpi_mwait.support |=
1731 1756 MWAIT_ECX_INT_ENABLE;
1732 1757 }
1733 1758 break;
1734 1759 }
1735 1760 default:
1736 1761 break;
1737 1762 }
1738 1763 }
1739 1764
1740 1765 if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1741 1766 struct cpuid_regs regs;
1742 1767
1743 1768 cp = ®s;
1744 1769 cp->cp_eax = 0xB;
1745 1770 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1746 1771
1747 1772 (void) __cpuid_insn(cp);
1748 1773
1749 1774 /*
1750 1775 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1751 1776 * indicates that the extended topology enumeration leaf is
1752 1777 * available.
1753 1778 */
1754 1779 if (cp->cp_ebx) {
1755 1780 uint32_t x2apic_id;
1756 1781 uint_t coreid_shift = 0;
1757 1782 uint_t ncpu_per_core = 1;
1758 1783 uint_t chipid_shift = 0;
1759 1784 uint_t ncpu_per_chip = 1;
1760 1785 uint_t i;
1761 1786 uint_t level;
1762 1787
1763 1788 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1764 1789 cp->cp_eax = 0xB;
1765 1790 cp->cp_ecx = i;
1766 1791
1767 1792 (void) __cpuid_insn(cp);
1768 1793 level = CPI_CPU_LEVEL_TYPE(cp);
1769 1794
1770 1795 if (level == 1) {
1771 1796 x2apic_id = cp->cp_edx;
1772 1797 coreid_shift = BITX(cp->cp_eax, 4, 0);
1773 1798 ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1774 1799 } else if (level == 2) {
1775 1800 x2apic_id = cp->cp_edx;
1776 1801 chipid_shift = BITX(cp->cp_eax, 4, 0);
1777 1802 ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1778 1803 }
1779 1804 }
1780 1805
1781 1806 cpi->cpi_apicid = x2apic_id;
1782 1807 cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1783 1808 cpi->cpi_ncore_per_chip = ncpu_per_chip /
1784 1809 ncpu_per_core;
1785 1810 cpi->cpi_chipid = x2apic_id >> chipid_shift;
1786 1811 cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1787 1812 cpi->cpi_coreid = x2apic_id >> coreid_shift;
1788 1813 cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1789 1814 }
1790 1815
1791 1816 /* Make cp NULL so that we don't stumble on others */
1792 1817 cp = NULL;
1793 1818 }
1794 1819
1795 1820 /*
1796 1821 * XSAVE enumeration
1797 1822 */
1798 1823 if (cpi->cpi_maxeax >= 0xD) {
1799 1824 struct cpuid_regs regs;
1800 1825 boolean_t cpuid_d_valid = B_TRUE;
1801 1826
1802 1827 cp = ®s;
1803 1828 cp->cp_eax = 0xD;
1804 1829 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1805 1830
1806 1831 (void) __cpuid_insn(cp);
1807 1832
1808 1833 /*
1809 1834 * Sanity checks for debug
1810 1835 */
1811 1836 if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1812 1837 (cp->cp_eax & XFEATURE_SSE) == 0) {
1813 1838 cpuid_d_valid = B_FALSE;
1814 1839 }
1815 1840
1816 1841 cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1817 1842 cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1818 1843 cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1819 1844
1820 1845 /*
1821 1846 * If the hw supports AVX, get the size and offset in the save
1822 1847 * area for the ymm state.
1823 1848 */
1824 1849 if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1825 1850 cp->cp_eax = 0xD;
1826 1851 cp->cp_ecx = 2;
1827 1852 cp->cp_edx = cp->cp_ebx = 0;
1828 1853
1829 1854 (void) __cpuid_insn(cp);
1830 1855
1831 1856 if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1832 1857 cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1833 1858 cpuid_d_valid = B_FALSE;
1834 1859 }
1835 1860
1836 1861 cpi->cpi_xsave.ymm_size = cp->cp_eax;
1837 1862 cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1838 1863 }
1839 1864
1840 1865 if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1841 1866 xsave_state_size = 0;
1842 1867 } else if (cpuid_d_valid) {
1843 1868 xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1844 1869 } else {
1845 1870 /* Broken CPUID 0xD, probably in HVM */
1846 1871 cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1847 1872 "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1848 1873 ", ymm_size = %d, ymm_offset = %d\n",
1849 1874 cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1850 1875 cpi->cpi_xsave.xsav_hw_features_high,
1851 1876 (int)cpi->cpi_xsave.xsav_max_size,
1852 1877 (int)cpi->cpi_xsave.ymm_size,
1853 1878 (int)cpi->cpi_xsave.ymm_offset);
1854 1879
1855 1880 if (xsave_state_size != 0) {
1856 1881 /*
1857 1882 * This must be a non-boot CPU. We cannot
1858 1883 * continue, because boot cpu has already
1859 1884 * enabled XSAVE.
1860 1885 */
1861 1886 ASSERT(cpu->cpu_id != 0);
1862 1887 cmn_err(CE_PANIC, "cpu%d: we have already "
1863 1888 "enabled XSAVE on boot cpu, cannot "
1864 1889 "continue.", cpu->cpu_id);
1865 1890 } else {
1866 1891 /*
1867 1892 * Must be from boot CPU, OK to disable XSAVE.
1868 1893 */
1869 1894 ASSERT(cpu->cpu_id == 0);
1870 1895 remove_x86_feature(x86_featureset,
1871 1896 X86FSET_XSAVE);
1872 1897 remove_x86_feature(x86_featureset, X86FSET_AVX);
1873 1898 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1874 1899 CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1875 1900 xsave_force_disable = B_TRUE;
1876 1901 }
1877 1902 }
1878 1903 }
1879 1904
1880 1905
1881 1906 if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1882 1907 goto pass2_done;
1883 1908
1884 1909 if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1885 1910 nmax = NMAX_CPI_EXTD;
1886 1911 /*
1887 1912 * Copy the extended properties, fixing them as we go.
1888 1913 * (We already handled n == 0 and n == 1 in pass 1)
1889 1914 */
1890 1915 iptr = (void *)cpi->cpi_brandstr;
1891 1916 for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1892 1917 cp->cp_eax = 0x80000000 + n;
1893 1918 (void) __cpuid_insn(cp);
1894 1919 platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1895 1920 switch (n) {
1896 1921 case 2:
1897 1922 case 3:
1898 1923 case 4:
1899 1924 /*
1900 1925 * Extract the brand string
1901 1926 */
1902 1927 *iptr++ = cp->cp_eax;
1903 1928 *iptr++ = cp->cp_ebx;
1904 1929 *iptr++ = cp->cp_ecx;
1905 1930 *iptr++ = cp->cp_edx;
1906 1931 break;
1907 1932 case 5:
1908 1933 switch (cpi->cpi_vendor) {
1909 1934 case X86_VENDOR_AMD:
1910 1935 /*
1911 1936 * The Athlon and Duron were the first
1912 1937 * parts to report the sizes of the
1913 1938 * TLB for large pages. Before then,
1914 1939 * we don't trust the data.
1915 1940 */
1916 1941 if (cpi->cpi_family < 6 ||
1917 1942 (cpi->cpi_family == 6 &&
1918 1943 cpi->cpi_model < 1))
1919 1944 cp->cp_eax = 0;
1920 1945 break;
1921 1946 default:
1922 1947 break;
1923 1948 }
1924 1949 break;
1925 1950 case 6:
1926 1951 switch (cpi->cpi_vendor) {
1927 1952 case X86_VENDOR_AMD:
1928 1953 /*
1929 1954 * The Athlon and Duron were the first
1930 1955 * AMD parts with L2 TLB's.
1931 1956 * Before then, don't trust the data.
1932 1957 */
1933 1958 if (cpi->cpi_family < 6 ||
1934 1959 cpi->cpi_family == 6 &&
1935 1960 cpi->cpi_model < 1)
1936 1961 cp->cp_eax = cp->cp_ebx = 0;
1937 1962 /*
1938 1963 * AMD Duron rev A0 reports L2
1939 1964 * cache size incorrectly as 1K
1940 1965 * when it is really 64K
1941 1966 */
1942 1967 if (cpi->cpi_family == 6 &&
1943 1968 cpi->cpi_model == 3 &&
1944 1969 cpi->cpi_step == 0) {
1945 1970 cp->cp_ecx &= 0xffff;
1946 1971 cp->cp_ecx |= 0x400000;
1947 1972 }
1948 1973 break;
1949 1974 case X86_VENDOR_Cyrix: /* VIA C3 */
1950 1975 /*
1951 1976 * VIA C3 processors are a bit messed
1952 1977 * up w.r.t. encoding cache sizes in %ecx
1953 1978 */
1954 1979 if (cpi->cpi_family != 6)
1955 1980 break;
1956 1981 /*
1957 1982 * model 7 and 8 were incorrectly encoded
1958 1983 *
1959 1984 * xxx is model 8 really broken?
1960 1985 */
1961 1986 if (cpi->cpi_model == 7 ||
1962 1987 cpi->cpi_model == 8)
1963 1988 cp->cp_ecx =
1964 1989 BITX(cp->cp_ecx, 31, 24) << 16 |
1965 1990 BITX(cp->cp_ecx, 23, 16) << 12 |
1966 1991 BITX(cp->cp_ecx, 15, 8) << 8 |
1967 1992 BITX(cp->cp_ecx, 7, 0);
1968 1993 /*
1969 1994 * model 9 stepping 1 has wrong associativity
1970 1995 */
1971 1996 if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1972 1997 cp->cp_ecx |= 8 << 12;
1973 1998 break;
1974 1999 case X86_VENDOR_Intel:
1975 2000 /*
1976 2001 * Extended L2 Cache features function.
1977 2002 * First appeared on Prescott.
1978 2003 */
1979 2004 default:
1980 2005 break;
1981 2006 }
1982 2007 break;
1983 2008 default:
1984 2009 break;
1985 2010 }
1986 2011 }
1987 2012
1988 2013 pass2_done:
1989 2014 cpi->cpi_pass = 2;
1990 2015 }
1991 2016
1992 2017 static const char *
1993 2018 intel_cpubrand(const struct cpuid_info *cpi)
1994 2019 {
1995 2020 int i;
1996 2021
1997 2022 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
1998 2023 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
1999 2024 return ("i486");
2000 2025
2001 2026 switch (cpi->cpi_family) {
2002 2027 case 5:
2003 2028 return ("Intel Pentium(r)");
2004 2029 case 6:
2005 2030 switch (cpi->cpi_model) {
2006 2031 uint_t celeron, xeon;
2007 2032 const struct cpuid_regs *cp;
2008 2033 case 0:
2009 2034 case 1:
2010 2035 case 2:
2011 2036 return ("Intel Pentium(r) Pro");
2012 2037 case 3:
2013 2038 case 4:
2014 2039 return ("Intel Pentium(r) II");
2015 2040 case 6:
2016 2041 return ("Intel Celeron(r)");
2017 2042 case 5:
2018 2043 case 7:
2019 2044 celeron = xeon = 0;
2020 2045 cp = &cpi->cpi_std[2]; /* cache info */
2021 2046
2022 2047 for (i = 1; i < 4; i++) {
2023 2048 uint_t tmp;
2024 2049
2025 2050 tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2026 2051 if (tmp == 0x40)
2027 2052 celeron++;
2028 2053 if (tmp >= 0x44 && tmp <= 0x45)
2029 2054 xeon++;
2030 2055 }
2031 2056
2032 2057 for (i = 0; i < 2; i++) {
2033 2058 uint_t tmp;
2034 2059
2035 2060 tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2036 2061 if (tmp == 0x40)
2037 2062 celeron++;
2038 2063 else if (tmp >= 0x44 && tmp <= 0x45)
2039 2064 xeon++;
2040 2065 }
2041 2066
2042 2067 for (i = 0; i < 4; i++) {
2043 2068 uint_t tmp;
2044 2069
2045 2070 tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2046 2071 if (tmp == 0x40)
2047 2072 celeron++;
2048 2073 else if (tmp >= 0x44 && tmp <= 0x45)
2049 2074 xeon++;
2050 2075 }
2051 2076
2052 2077 for (i = 0; i < 4; i++) {
2053 2078 uint_t tmp;
2054 2079
2055 2080 tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2056 2081 if (tmp == 0x40)
2057 2082 celeron++;
2058 2083 else if (tmp >= 0x44 && tmp <= 0x45)
2059 2084 xeon++;
2060 2085 }
2061 2086
2062 2087 if (celeron)
2063 2088 return ("Intel Celeron(r)");
2064 2089 if (xeon)
2065 2090 return (cpi->cpi_model == 5 ?
2066 2091 "Intel Pentium(r) II Xeon(tm)" :
2067 2092 "Intel Pentium(r) III Xeon(tm)");
2068 2093 return (cpi->cpi_model == 5 ?
2069 2094 "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2070 2095 "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2071 2096 default:
2072 2097 break;
2073 2098 }
2074 2099 default:
2075 2100 break;
2076 2101 }
2077 2102
2078 2103 /* BrandID is present if the field is nonzero */
2079 2104 if (cpi->cpi_brandid != 0) {
2080 2105 static const struct {
2081 2106 uint_t bt_bid;
2082 2107 const char *bt_str;
2083 2108 } brand_tbl[] = {
2084 2109 { 0x1, "Intel(r) Celeron(r)" },
2085 2110 { 0x2, "Intel(r) Pentium(r) III" },
2086 2111 { 0x3, "Intel(r) Pentium(r) III Xeon(tm)" },
2087 2112 { 0x4, "Intel(r) Pentium(r) III" },
2088 2113 { 0x6, "Mobile Intel(r) Pentium(r) III" },
2089 2114 { 0x7, "Mobile Intel(r) Celeron(r)" },
2090 2115 { 0x8, "Intel(r) Pentium(r) 4" },
2091 2116 { 0x9, "Intel(r) Pentium(r) 4" },
2092 2117 { 0xa, "Intel(r) Celeron(r)" },
2093 2118 { 0xb, "Intel(r) Xeon(tm)" },
2094 2119 { 0xc, "Intel(r) Xeon(tm) MP" },
2095 2120 { 0xe, "Mobile Intel(r) Pentium(r) 4" },
2096 2121 { 0xf, "Mobile Intel(r) Celeron(r)" },
2097 2122 { 0x11, "Mobile Genuine Intel(r)" },
2098 2123 { 0x12, "Intel(r) Celeron(r) M" },
2099 2124 { 0x13, "Mobile Intel(r) Celeron(r)" },
2100 2125 { 0x14, "Intel(r) Celeron(r)" },
2101 2126 { 0x15, "Mobile Genuine Intel(r)" },
2102 2127 { 0x16, "Intel(r) Pentium(r) M" },
2103 2128 { 0x17, "Mobile Intel(r) Celeron(r)" }
2104 2129 };
2105 2130 uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2106 2131 uint_t sgn;
2107 2132
2108 2133 sgn = (cpi->cpi_family << 8) |
2109 2134 (cpi->cpi_model << 4) | cpi->cpi_step;
2110 2135
2111 2136 for (i = 0; i < btblmax; i++)
2112 2137 if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2113 2138 break;
2114 2139 if (i < btblmax) {
2115 2140 if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2116 2141 return ("Intel(r) Celeron(r)");
2117 2142 if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2118 2143 return ("Intel(r) Xeon(tm) MP");
2119 2144 if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2120 2145 return ("Intel(r) Xeon(tm)");
2121 2146 return (brand_tbl[i].bt_str);
2122 2147 }
2123 2148 }
2124 2149
2125 2150 return (NULL);
2126 2151 }
2127 2152
2128 2153 static const char *
2129 2154 amd_cpubrand(const struct cpuid_info *cpi)
2130 2155 {
2131 2156 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2132 2157 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2133 2158 return ("i486 compatible");
2134 2159
2135 2160 switch (cpi->cpi_family) {
2136 2161 case 5:
2137 2162 switch (cpi->cpi_model) {
2138 2163 case 0:
2139 2164 case 1:
2140 2165 case 2:
2141 2166 case 3:
2142 2167 case 4:
2143 2168 case 5:
2144 2169 return ("AMD-K5(r)");
2145 2170 case 6:
2146 2171 case 7:
2147 2172 return ("AMD-K6(r)");
2148 2173 case 8:
2149 2174 return ("AMD-K6(r)-2");
2150 2175 case 9:
2151 2176 return ("AMD-K6(r)-III");
2152 2177 default:
2153 2178 return ("AMD (family 5)");
2154 2179 }
2155 2180 case 6:
2156 2181 switch (cpi->cpi_model) {
2157 2182 case 1:
2158 2183 return ("AMD-K7(tm)");
2159 2184 case 0:
2160 2185 case 2:
2161 2186 case 4:
2162 2187 return ("AMD Athlon(tm)");
2163 2188 case 3:
2164 2189 case 7:
2165 2190 return ("AMD Duron(tm)");
2166 2191 case 6:
2167 2192 case 8:
2168 2193 case 10:
2169 2194 /*
2170 2195 * Use the L2 cache size to distinguish
2171 2196 */
2172 2197 return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2173 2198 "AMD Athlon(tm)" : "AMD Duron(tm)");
2174 2199 default:
2175 2200 return ("AMD (family 6)");
2176 2201 }
2177 2202 default:
2178 2203 break;
2179 2204 }
2180 2205
2181 2206 if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2182 2207 cpi->cpi_brandid != 0) {
2183 2208 switch (BITX(cpi->cpi_brandid, 7, 5)) {
2184 2209 case 3:
2185 2210 return ("AMD Opteron(tm) UP 1xx");
2186 2211 case 4:
2187 2212 return ("AMD Opteron(tm) DP 2xx");
2188 2213 case 5:
2189 2214 return ("AMD Opteron(tm) MP 8xx");
2190 2215 default:
2191 2216 return ("AMD Opteron(tm)");
2192 2217 }
2193 2218 }
2194 2219
2195 2220 return (NULL);
2196 2221 }
2197 2222
2198 2223 static const char *
2199 2224 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2200 2225 {
2201 2226 if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2202 2227 cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2203 2228 type == X86_TYPE_CYRIX_486)
2204 2229 return ("i486 compatible");
2205 2230
2206 2231 switch (type) {
2207 2232 case X86_TYPE_CYRIX_6x86:
2208 2233 return ("Cyrix 6x86");
2209 2234 case X86_TYPE_CYRIX_6x86L:
2210 2235 return ("Cyrix 6x86L");
2211 2236 case X86_TYPE_CYRIX_6x86MX:
2212 2237 return ("Cyrix 6x86MX");
2213 2238 case X86_TYPE_CYRIX_GXm:
2214 2239 return ("Cyrix GXm");
2215 2240 case X86_TYPE_CYRIX_MediaGX:
2216 2241 return ("Cyrix MediaGX");
2217 2242 case X86_TYPE_CYRIX_MII:
2218 2243 return ("Cyrix M2");
2219 2244 case X86_TYPE_VIA_CYRIX_III:
2220 2245 return ("VIA Cyrix M3");
2221 2246 default:
2222 2247 /*
2223 2248 * Have another wild guess ..
2224 2249 */
2225 2250 if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2226 2251 return ("Cyrix 5x86");
2227 2252 else if (cpi->cpi_family == 5) {
2228 2253 switch (cpi->cpi_model) {
2229 2254 case 2:
2230 2255 return ("Cyrix 6x86"); /* Cyrix M1 */
2231 2256 case 4:
2232 2257 return ("Cyrix MediaGX");
2233 2258 default:
2234 2259 break;
2235 2260 }
2236 2261 } else if (cpi->cpi_family == 6) {
2237 2262 switch (cpi->cpi_model) {
2238 2263 case 0:
2239 2264 return ("Cyrix 6x86MX"); /* Cyrix M2? */
2240 2265 case 5:
2241 2266 case 6:
2242 2267 case 7:
2243 2268 case 8:
2244 2269 case 9:
2245 2270 return ("VIA C3");
2246 2271 default:
2247 2272 break;
2248 2273 }
2249 2274 }
2250 2275 break;
2251 2276 }
2252 2277 return (NULL);
2253 2278 }
2254 2279
2255 2280 /*
2256 2281 * This only gets called in the case that the CPU extended
2257 2282 * feature brand string (0x80000002, 0x80000003, 0x80000004)
2258 2283 * aren't available, or contain null bytes for some reason.
2259 2284 */
2260 2285 static void
2261 2286 fabricate_brandstr(struct cpuid_info *cpi)
2262 2287 {
2263 2288 const char *brand = NULL;
2264 2289
2265 2290 switch (cpi->cpi_vendor) {
2266 2291 case X86_VENDOR_Intel:
2267 2292 brand = intel_cpubrand(cpi);
2268 2293 break;
2269 2294 case X86_VENDOR_AMD:
2270 2295 brand = amd_cpubrand(cpi);
2271 2296 break;
2272 2297 case X86_VENDOR_Cyrix:
2273 2298 brand = cyrix_cpubrand(cpi, x86_type);
2274 2299 break;
2275 2300 case X86_VENDOR_NexGen:
2276 2301 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2277 2302 brand = "NexGen Nx586";
2278 2303 break;
2279 2304 case X86_VENDOR_Centaur:
2280 2305 if (cpi->cpi_family == 5)
2281 2306 switch (cpi->cpi_model) {
2282 2307 case 4:
2283 2308 brand = "Centaur C6";
2284 2309 break;
2285 2310 case 8:
2286 2311 brand = "Centaur C2";
2287 2312 break;
2288 2313 case 9:
2289 2314 brand = "Centaur C3";
2290 2315 break;
2291 2316 default:
2292 2317 break;
2293 2318 }
2294 2319 break;
2295 2320 case X86_VENDOR_Rise:
2296 2321 if (cpi->cpi_family == 5 &&
2297 2322 (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2298 2323 brand = "Rise mP6";
2299 2324 break;
2300 2325 case X86_VENDOR_SiS:
2301 2326 if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2302 2327 brand = "SiS 55x";
2303 2328 break;
2304 2329 case X86_VENDOR_TM:
2305 2330 if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2306 2331 brand = "Transmeta Crusoe TM3x00 or TM5x00";
2307 2332 break;
2308 2333 case X86_VENDOR_NSC:
2309 2334 case X86_VENDOR_UMC:
2310 2335 default:
2311 2336 break;
2312 2337 }
2313 2338 if (brand) {
2314 2339 (void) strcpy((char *)cpi->cpi_brandstr, brand);
2315 2340 return;
2316 2341 }
2317 2342
2318 2343 /*
2319 2344 * If all else fails ...
2320 2345 */
2321 2346 (void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2322 2347 "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2323 2348 cpi->cpi_model, cpi->cpi_step);
2324 2349 }
2325 2350
2326 2351 /*
2327 2352 * This routine is called just after kernel memory allocation
2328 2353 * becomes available on cpu0, and as part of mp_startup() on
2329 2354 * the other cpus.
2330 2355 *
2331 2356 * Fixup the brand string, and collect any information from cpuid
2332 2357 * that requires dynamicically allocated storage to represent.
2333 2358 */
2334 2359 /*ARGSUSED*/
2335 2360 void
2336 2361 cpuid_pass3(cpu_t *cpu)
2337 2362 {
2338 2363 int i, max, shft, level, size;
2339 2364 struct cpuid_regs regs;
2340 2365 struct cpuid_regs *cp;
2341 2366 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2342 2367
2343 2368 ASSERT(cpi->cpi_pass == 2);
2344 2369
2345 2370 /*
2346 2371 * Function 4: Deterministic cache parameters
2347 2372 *
2348 2373 * Take this opportunity to detect the number of threads
2349 2374 * sharing the last level cache, and construct a corresponding
2350 2375 * cache id. The respective cpuid_info members are initialized
2351 2376 * to the default case of "no last level cache sharing".
2352 2377 */
2353 2378 cpi->cpi_ncpu_shr_last_cache = 1;
2354 2379 cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2355 2380
2356 2381 if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2357 2382
2358 2383 /*
2359 2384 * Find the # of elements (size) returned by fn 4, and along
2360 2385 * the way detect last level cache sharing details.
2361 2386 */
2362 2387 bzero(®s, sizeof (regs));
2363 2388 cp = ®s;
2364 2389 for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2365 2390 cp->cp_eax = 4;
2366 2391 cp->cp_ecx = i;
2367 2392
2368 2393 (void) __cpuid_insn(cp);
2369 2394
2370 2395 if (CPI_CACHE_TYPE(cp) == 0)
2371 2396 break;
2372 2397 level = CPI_CACHE_LVL(cp);
2373 2398 if (level > max) {
2374 2399 max = level;
2375 2400 cpi->cpi_ncpu_shr_last_cache =
2376 2401 CPI_NTHR_SHR_CACHE(cp) + 1;
2377 2402 }
2378 2403 }
2379 2404 cpi->cpi_std_4_size = size = i;
2380 2405
2381 2406 /*
2382 2407 * Allocate the cpi_std_4 array. The first element
2383 2408 * references the regs for fn 4, %ecx == 0, which
2384 2409 * cpuid_pass2() stashed in cpi->cpi_std[4].
2385 2410 */
2386 2411 if (size > 0) {
2387 2412 cpi->cpi_std_4 =
2388 2413 kmem_alloc(size * sizeof (cp), KM_SLEEP);
2389 2414 cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2390 2415
2391 2416 /*
2392 2417 * Allocate storage to hold the additional regs
2393 2418 * for function 4, %ecx == 1 .. cpi_std_4_size.
2394 2419 *
2395 2420 * The regs for fn 4, %ecx == 0 has already
2396 2421 * been allocated as indicated above.
2397 2422 */
2398 2423 for (i = 1; i < size; i++) {
2399 2424 cp = cpi->cpi_std_4[i] =
2400 2425 kmem_zalloc(sizeof (regs), KM_SLEEP);
2401 2426 cp->cp_eax = 4;
2402 2427 cp->cp_ecx = i;
2403 2428
2404 2429 (void) __cpuid_insn(cp);
2405 2430 }
2406 2431 }
2407 2432 /*
2408 2433 * Determine the number of bits needed to represent
2409 2434 * the number of CPUs sharing the last level cache.
2410 2435 *
2411 2436 * Shift off that number of bits from the APIC id to
2412 2437 * derive the cache id.
2413 2438 */
2414 2439 shft = 0;
2415 2440 for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2416 2441 shft++;
2417 2442 cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2418 2443 }
2419 2444
2420 2445 /*
2421 2446 * Now fixup the brand string
2422 2447 */
2423 2448 if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2424 2449 fabricate_brandstr(cpi);
2425 2450 } else {
2426 2451
2427 2452 /*
2428 2453 * If we successfully extracted a brand string from the cpuid
2429 2454 * instruction, clean it up by removing leading spaces and
2430 2455 * similar junk.
2431 2456 */
2432 2457 if (cpi->cpi_brandstr[0]) {
2433 2458 size_t maxlen = sizeof (cpi->cpi_brandstr);
2434 2459 char *src, *dst;
2435 2460
2436 2461 dst = src = (char *)cpi->cpi_brandstr;
2437 2462 src[maxlen - 1] = '\0';
2438 2463 /*
2439 2464 * strip leading spaces
2440 2465 */
2441 2466 while (*src == ' ')
2442 2467 src++;
2443 2468 /*
2444 2469 * Remove any 'Genuine' or "Authentic" prefixes
2445 2470 */
2446 2471 if (strncmp(src, "Genuine ", 8) == 0)
2447 2472 src += 8;
2448 2473 if (strncmp(src, "Authentic ", 10) == 0)
2449 2474 src += 10;
2450 2475
2451 2476 /*
2452 2477 * Now do an in-place copy.
2453 2478 * Map (R) to (r) and (TM) to (tm).
2454 2479 * The era of teletypes is long gone, and there's
2455 2480 * -really- no need to shout.
2456 2481 */
2457 2482 while (*src != '\0') {
2458 2483 if (src[0] == '(') {
2459 2484 if (strncmp(src + 1, "R)", 2) == 0) {
2460 2485 (void) strncpy(dst, "(r)", 3);
2461 2486 src += 3;
2462 2487 dst += 3;
2463 2488 continue;
2464 2489 }
2465 2490 if (strncmp(src + 1, "TM)", 3) == 0) {
2466 2491 (void) strncpy(dst, "(tm)", 4);
2467 2492 src += 4;
2468 2493 dst += 4;
2469 2494 continue;
2470 2495 }
2471 2496 }
2472 2497 *dst++ = *src++;
2473 2498 }
2474 2499 *dst = '\0';
2475 2500
2476 2501 /*
2477 2502 * Finally, remove any trailing spaces
2478 2503 */
2479 2504 while (--dst > cpi->cpi_brandstr)
2480 2505 if (*dst == ' ')
2481 2506 *dst = '\0';
2482 2507 else
2483 2508 break;
2484 2509 } else
2485 2510 fabricate_brandstr(cpi);
2486 2511 }
2487 2512 cpi->cpi_pass = 3;
2488 2513 }
2489 2514
2490 2515 /*
2491 2516 * This routine is called out of bind_hwcap() much later in the life
2492 2517 * of the kernel (post_startup()). The job of this routine is to resolve
2493 2518 * the hardware feature support and kernel support for those features into
2494 2519 * what we're actually going to tell applications via the aux vector.
2495 2520 */
2496 2521 uint_t
2497 2522 cpuid_pass4(cpu_t *cpu)
2498 2523 {
2499 2524 struct cpuid_info *cpi;
2500 2525 uint_t hwcap_flags = 0;
2501 2526
2502 2527 if (cpu == NULL)
2503 2528 cpu = CPU;
2504 2529 cpi = cpu->cpu_m.mcpu_cpi;
2505 2530
2506 2531 ASSERT(cpi->cpi_pass == 3);
2507 2532
2508 2533 if (cpi->cpi_maxeax >= 1) {
2509 2534 uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2510 2535 uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2511 2536
2512 2537 *edx = CPI_FEATURES_EDX(cpi);
2513 2538 *ecx = CPI_FEATURES_ECX(cpi);
2514 2539
2515 2540 /*
2516 2541 * [these require explicit kernel support]
2517 2542 */
2518 2543 if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2519 2544 *edx &= ~CPUID_INTC_EDX_SEP;
2520 2545
2521 2546 if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2522 2547 *edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2523 2548 if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2524 2549 *edx &= ~CPUID_INTC_EDX_SSE2;
2525 2550
2526 2551 if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2527 2552 *edx &= ~CPUID_INTC_EDX_HTT;
2528 2553
2529 2554 if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2530 2555 *ecx &= ~CPUID_INTC_ECX_SSE3;
2531 2556
2532 2557 if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2533 2558 *ecx &= ~CPUID_INTC_ECX_SSSE3;
2534 2559 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2535 2560 *ecx &= ~CPUID_INTC_ECX_SSE4_1;
2536 2561 if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2537 2562 *ecx &= ~CPUID_INTC_ECX_SSE4_2;
2538 2563 if (!is_x86_feature(x86_featureset, X86FSET_AES))
2539 2564 *ecx &= ~CPUID_INTC_ECX_AES;
2540 2565 if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2541 2566 *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2542 2567 if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2543 2568 *ecx &= ~(CPUID_INTC_ECX_XSAVE |
2544 2569 CPUID_INTC_ECX_OSXSAVE);
2545 2570 if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2546 2571 *ecx &= ~CPUID_INTC_ECX_AVX;
2547 2572
2548 2573 /*
2549 2574 * [no explicit support required beyond x87 fp context]
2550 2575 */
2551 2576 if (!fpu_exists)
2552 2577 *edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2553 2578
2554 2579 /*
2555 2580 * Now map the supported feature vector to things that we
2556 2581 * think userland will care about.
2557 2582 */
2558 2583 if (*edx & CPUID_INTC_EDX_SEP)
2559 2584 hwcap_flags |= AV_386_SEP;
2560 2585 if (*edx & CPUID_INTC_EDX_SSE)
2561 2586 hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2562 2587 if (*edx & CPUID_INTC_EDX_SSE2)
2563 2588 hwcap_flags |= AV_386_SSE2;
2564 2589 if (*ecx & CPUID_INTC_ECX_SSE3)
2565 2590 hwcap_flags |= AV_386_SSE3;
2566 2591 if (*ecx & CPUID_INTC_ECX_SSSE3)
2567 2592 hwcap_flags |= AV_386_SSSE3;
2568 2593 if (*ecx & CPUID_INTC_ECX_SSE4_1)
2569 2594 hwcap_flags |= AV_386_SSE4_1;
2570 2595 if (*ecx & CPUID_INTC_ECX_SSE4_2)
2571 2596 hwcap_flags |= AV_386_SSE4_2;
2572 2597 if (*ecx & CPUID_INTC_ECX_MOVBE)
2573 2598 hwcap_flags |= AV_386_MOVBE;
2574 2599 if (*ecx & CPUID_INTC_ECX_AES)
2575 2600 hwcap_flags |= AV_386_AES;
2576 2601 if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2577 2602 hwcap_flags |= AV_386_PCLMULQDQ;
2578 2603 if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2579 2604 (*ecx & CPUID_INTC_ECX_OSXSAVE)) {
2580 2605 hwcap_flags |= AV_386_XSAVE;
2581 2606
2582 2607 if (*ecx & CPUID_INTC_ECX_AVX)
2583 2608 hwcap_flags |= AV_386_AVX;
2584 2609 }
2585 2610 if (*ecx & CPUID_INTC_ECX_VMX)
2586 2611 hwcap_flags |= AV_386_VMX;
2587 2612 if (*ecx & CPUID_INTC_ECX_POPCNT)
2588 2613 hwcap_flags |= AV_386_POPCNT;
2589 2614 if (*edx & CPUID_INTC_EDX_FPU)
2590 2615 hwcap_flags |= AV_386_FPU;
2591 2616 if (*edx & CPUID_INTC_EDX_MMX)
2592 2617 hwcap_flags |= AV_386_MMX;
2593 2618
2594 2619 if (*edx & CPUID_INTC_EDX_TSC)
2595 2620 hwcap_flags |= AV_386_TSC;
2596 2621 if (*edx & CPUID_INTC_EDX_CX8)
2597 2622 hwcap_flags |= AV_386_CX8;
2598 2623 if (*edx & CPUID_INTC_EDX_CMOV)
2599 2624 hwcap_flags |= AV_386_CMOV;
2600 2625 if (*ecx & CPUID_INTC_ECX_CX16)
2601 2626 hwcap_flags |= AV_386_CX16;
2602 2627 }
2603 2628
2604 2629 if (cpi->cpi_xmaxeax < 0x80000001)
2605 2630 goto pass4_done;
2606 2631
2607 2632 switch (cpi->cpi_vendor) {
2608 2633 struct cpuid_regs cp;
2609 2634 uint32_t *edx, *ecx;
2610 2635
2611 2636 case X86_VENDOR_Intel:
2612 2637 /*
2613 2638 * Seems like Intel duplicated what we necessary
2614 2639 * here to make the initial crop of 64-bit OS's work.
2615 2640 * Hopefully, those are the only "extended" bits
2616 2641 * they'll add.
2617 2642 */
2618 2643 /*FALLTHROUGH*/
2619 2644
2620 2645 case X86_VENDOR_AMD:
2621 2646 edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2622 2647 ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2623 2648
2624 2649 *edx = CPI_FEATURES_XTD_EDX(cpi);
2625 2650 *ecx = CPI_FEATURES_XTD_ECX(cpi);
2626 2651
2627 2652 /*
2628 2653 * [these features require explicit kernel support]
2629 2654 */
2630 2655 switch (cpi->cpi_vendor) {
2631 2656 case X86_VENDOR_Intel:
2632 2657 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2633 2658 *edx &= ~CPUID_AMD_EDX_TSCP;
2634 2659 break;
2635 2660
2636 2661 case X86_VENDOR_AMD:
2637 2662 if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2638 2663 *edx &= ~CPUID_AMD_EDX_TSCP;
2639 2664 if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2640 2665 *ecx &= ~CPUID_AMD_ECX_SSE4A;
2641 2666 break;
2642 2667
2643 2668 default:
2644 2669 break;
2645 2670 }
2646 2671
2647 2672 /*
2648 2673 * [no explicit support required beyond
2649 2674 * x87 fp context and exception handlers]
2650 2675 */
2651 2676 if (!fpu_exists)
2652 2677 *edx &= ~(CPUID_AMD_EDX_MMXamd |
2653 2678 CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2654 2679
2655 2680 if (!is_x86_feature(x86_featureset, X86FSET_NX))
2656 2681 *edx &= ~CPUID_AMD_EDX_NX;
2657 2682 #if !defined(__amd64)
2658 2683 *edx &= ~CPUID_AMD_EDX_LM;
2659 2684 #endif
2660 2685 /*
2661 2686 * Now map the supported feature vector to
2662 2687 * things that we think userland will care about.
2663 2688 */
2664 2689 #if defined(__amd64)
2665 2690 if (*edx & CPUID_AMD_EDX_SYSC)
2666 2691 hwcap_flags |= AV_386_AMD_SYSC;
2667 2692 #endif
2668 2693 if (*edx & CPUID_AMD_EDX_MMXamd)
2669 2694 hwcap_flags |= AV_386_AMD_MMX;
2670 2695 if (*edx & CPUID_AMD_EDX_3DNow)
2671 2696 hwcap_flags |= AV_386_AMD_3DNow;
2672 2697 if (*edx & CPUID_AMD_EDX_3DNowx)
2673 2698 hwcap_flags |= AV_386_AMD_3DNowx;
2674 2699 if (*ecx & CPUID_AMD_ECX_SVM)
2675 2700 hwcap_flags |= AV_386_AMD_SVM;
2676 2701
2677 2702 switch (cpi->cpi_vendor) {
2678 2703 case X86_VENDOR_AMD:
2679 2704 if (*edx & CPUID_AMD_EDX_TSCP)
2680 2705 hwcap_flags |= AV_386_TSCP;
2681 2706 if (*ecx & CPUID_AMD_ECX_AHF64)
2682 2707 hwcap_flags |= AV_386_AHF;
2683 2708 if (*ecx & CPUID_AMD_ECX_SSE4A)
2684 2709 hwcap_flags |= AV_386_AMD_SSE4A;
2685 2710 if (*ecx & CPUID_AMD_ECX_LZCNT)
2686 2711 hwcap_flags |= AV_386_AMD_LZCNT;
2687 2712 break;
2688 2713
2689 2714 case X86_VENDOR_Intel:
2690 2715 if (*edx & CPUID_AMD_EDX_TSCP)
2691 2716 hwcap_flags |= AV_386_TSCP;
2692 2717 /*
2693 2718 * Aarrgh.
2694 2719 * Intel uses a different bit in the same word.
2695 2720 */
2696 2721 if (*ecx & CPUID_INTC_ECX_AHF64)
2697 2722 hwcap_flags |= AV_386_AHF;
2698 2723 break;
2699 2724
2700 2725 default:
2701 2726 break;
2702 2727 }
2703 2728 break;
2704 2729
2705 2730 case X86_VENDOR_TM:
2706 2731 cp.cp_eax = 0x80860001;
2707 2732 (void) __cpuid_insn(&cp);
2708 2733 cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2709 2734 break;
2710 2735
2711 2736 default:
2712 2737 break;
2713 2738 }
2714 2739
2715 2740 pass4_done:
2716 2741 cpi->cpi_pass = 4;
2717 2742 return (hwcap_flags);
2718 2743 }
2719 2744
2720 2745
2721 2746 /*
2722 2747 * Simulate the cpuid instruction using the data we previously
2723 2748 * captured about this CPU. We try our best to return the truth
2724 2749 * about the hardware, independently of kernel support.
2725 2750 */
2726 2751 uint32_t
2727 2752 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2728 2753 {
2729 2754 struct cpuid_info *cpi;
2730 2755 struct cpuid_regs *xcp;
2731 2756
2732 2757 if (cpu == NULL)
2733 2758 cpu = CPU;
2734 2759 cpi = cpu->cpu_m.mcpu_cpi;
2735 2760
2736 2761 ASSERT(cpuid_checkpass(cpu, 3));
2737 2762
2738 2763 /*
2739 2764 * CPUID data is cached in two separate places: cpi_std for standard
2740 2765 * CPUID functions, and cpi_extd for extended CPUID functions.
2741 2766 */
2742 2767 if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2743 2768 xcp = &cpi->cpi_std[cp->cp_eax];
2744 2769 else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2745 2770 cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2746 2771 xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2747 2772 else
2748 2773 /*
2749 2774 * The caller is asking for data from an input parameter which
2750 2775 * the kernel has not cached. In this case we go fetch from
2751 2776 * the hardware and return the data directly to the user.
2752 2777 */
2753 2778 return (__cpuid_insn(cp));
2754 2779
2755 2780 cp->cp_eax = xcp->cp_eax;
2756 2781 cp->cp_ebx = xcp->cp_ebx;
2757 2782 cp->cp_ecx = xcp->cp_ecx;
2758 2783 cp->cp_edx = xcp->cp_edx;
2759 2784 return (cp->cp_eax);
2760 2785 }
2761 2786
2762 2787 int
2763 2788 cpuid_checkpass(cpu_t *cpu, int pass)
2764 2789 {
2765 2790 return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2766 2791 cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2767 2792 }
2768 2793
2769 2794 int
2770 2795 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2771 2796 {
2772 2797 ASSERT(cpuid_checkpass(cpu, 3));
2773 2798
2774 2799 return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2775 2800 }
2776 2801
2777 2802 int
2778 2803 cpuid_is_cmt(cpu_t *cpu)
2779 2804 {
2780 2805 if (cpu == NULL)
2781 2806 cpu = CPU;
2782 2807
2783 2808 ASSERT(cpuid_checkpass(cpu, 1));
2784 2809
2785 2810 return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2786 2811 }
2787 2812
2788 2813 /*
2789 2814 * AMD and Intel both implement the 64-bit variant of the syscall
2790 2815 * instruction (syscallq), so if there's -any- support for syscall,
2791 2816 * cpuid currently says "yes, we support this".
2792 2817 *
2793 2818 * However, Intel decided to -not- implement the 32-bit variant of the
2794 2819 * syscall instruction, so we provide a predicate to allow our caller
2795 2820 * to test that subtlety here.
2796 2821 *
2797 2822 * XXPV Currently, 32-bit syscall instructions don't work via the hypervisor,
2798 2823 * even in the case where the hardware would in fact support it.
2799 2824 */
2800 2825 /*ARGSUSED*/
2801 2826 int
2802 2827 cpuid_syscall32_insn(cpu_t *cpu)
2803 2828 {
2804 2829 ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2805 2830
2806 2831 #if !defined(__xpv)
2807 2832 if (cpu == NULL)
2808 2833 cpu = CPU;
2809 2834
2810 2835 /*CSTYLED*/
2811 2836 {
2812 2837 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2813 2838
2814 2839 if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2815 2840 cpi->cpi_xmaxeax >= 0x80000001 &&
2816 2841 (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2817 2842 return (1);
2818 2843 }
2819 2844 #endif
2820 2845 return (0);
2821 2846 }
2822 2847
2823 2848 int
2824 2849 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2825 2850 {
2826 2851 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2827 2852
2828 2853 static const char fmt[] =
2829 2854 "x86 (%s %X family %d model %d step %d clock %d MHz)";
2830 2855 static const char fmt_ht[] =
2831 2856 "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2832 2857
2833 2858 ASSERT(cpuid_checkpass(cpu, 1));
2834 2859
2835 2860 if (cpuid_is_cmt(cpu))
2836 2861 return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2837 2862 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2838 2863 cpi->cpi_family, cpi->cpi_model,
2839 2864 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2840 2865 return (snprintf(s, n, fmt,
2841 2866 cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2842 2867 cpi->cpi_family, cpi->cpi_model,
2843 2868 cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2844 2869 }
2845 2870
2846 2871 const char *
2847 2872 cpuid_getvendorstr(cpu_t *cpu)
2848 2873 {
2849 2874 ASSERT(cpuid_checkpass(cpu, 1));
2850 2875 return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2851 2876 }
2852 2877
2853 2878 uint_t
2854 2879 cpuid_getvendor(cpu_t *cpu)
2855 2880 {
2856 2881 ASSERT(cpuid_checkpass(cpu, 1));
2857 2882 return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2858 2883 }
2859 2884
2860 2885 uint_t
2861 2886 cpuid_getfamily(cpu_t *cpu)
2862 2887 {
2863 2888 ASSERT(cpuid_checkpass(cpu, 1));
2864 2889 return (cpu->cpu_m.mcpu_cpi->cpi_family);
2865 2890 }
2866 2891
2867 2892 uint_t
2868 2893 cpuid_getmodel(cpu_t *cpu)
2869 2894 {
2870 2895 ASSERT(cpuid_checkpass(cpu, 1));
2871 2896 return (cpu->cpu_m.mcpu_cpi->cpi_model);
2872 2897 }
2873 2898
2874 2899 uint_t
2875 2900 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2876 2901 {
2877 2902 ASSERT(cpuid_checkpass(cpu, 1));
2878 2903 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2879 2904 }
2880 2905
2881 2906 uint_t
2882 2907 cpuid_get_ncore_per_chip(cpu_t *cpu)
2883 2908 {
2884 2909 ASSERT(cpuid_checkpass(cpu, 1));
2885 2910 return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2886 2911 }
2887 2912
2888 2913 uint_t
2889 2914 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2890 2915 {
2891 2916 ASSERT(cpuid_checkpass(cpu, 2));
2892 2917 return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2893 2918 }
2894 2919
2895 2920 id_t
2896 2921 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2897 2922 {
2898 2923 ASSERT(cpuid_checkpass(cpu, 2));
2899 2924 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2900 2925 }
2901 2926
2902 2927 uint_t
2903 2928 cpuid_getstep(cpu_t *cpu)
2904 2929 {
2905 2930 ASSERT(cpuid_checkpass(cpu, 1));
2906 2931 return (cpu->cpu_m.mcpu_cpi->cpi_step);
2907 2932 }
2908 2933
2909 2934 uint_t
2910 2935 cpuid_getsig(struct cpu *cpu)
2911 2936 {
2912 2937 ASSERT(cpuid_checkpass(cpu, 1));
2913 2938 return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2914 2939 }
2915 2940
2916 2941 uint32_t
2917 2942 cpuid_getchiprev(struct cpu *cpu)
2918 2943 {
2919 2944 ASSERT(cpuid_checkpass(cpu, 1));
2920 2945 return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2921 2946 }
2922 2947
2923 2948 const char *
2924 2949 cpuid_getchiprevstr(struct cpu *cpu)
2925 2950 {
2926 2951 ASSERT(cpuid_checkpass(cpu, 1));
2927 2952 return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2928 2953 }
2929 2954
2930 2955 uint32_t
2931 2956 cpuid_getsockettype(struct cpu *cpu)
2932 2957 {
2933 2958 ASSERT(cpuid_checkpass(cpu, 1));
2934 2959 return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2935 2960 }
2936 2961
2937 2962 const char *
2938 2963 cpuid_getsocketstr(cpu_t *cpu)
2939 2964 {
2940 2965 static const char *socketstr = NULL;
2941 2966 struct cpuid_info *cpi;
2942 2967
2943 2968 ASSERT(cpuid_checkpass(cpu, 1));
2944 2969 cpi = cpu->cpu_m.mcpu_cpi;
2945 2970
2946 2971 /* Assume that socket types are the same across the system */
2947 2972 if (socketstr == NULL)
2948 2973 socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2949 2974 cpi->cpi_model, cpi->cpi_step);
2950 2975
2951 2976
2952 2977 return (socketstr);
2953 2978 }
2954 2979
2955 2980 int
2956 2981 cpuid_get_chipid(cpu_t *cpu)
2957 2982 {
2958 2983 ASSERT(cpuid_checkpass(cpu, 1));
2959 2984
2960 2985 if (cpuid_is_cmt(cpu))
2961 2986 return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2962 2987 return (cpu->cpu_id);
2963 2988 }
2964 2989
2965 2990 id_t
2966 2991 cpuid_get_coreid(cpu_t *cpu)
2967 2992 {
2968 2993 ASSERT(cpuid_checkpass(cpu, 1));
2969 2994 return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2970 2995 }
2971 2996
2972 2997 int
2973 2998 cpuid_get_pkgcoreid(cpu_t *cpu)
2974 2999 {
2975 3000 ASSERT(cpuid_checkpass(cpu, 1));
2976 3001 return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
2977 3002 }
2978 3003
2979 3004 int
2980 3005 cpuid_get_clogid(cpu_t *cpu)
2981 3006 {
2982 3007 ASSERT(cpuid_checkpass(cpu, 1));
2983 3008 return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2984 3009 }
2985 3010
2986 3011 int
2987 3012 cpuid_get_cacheid(cpu_t *cpu)
2988 3013 {
2989 3014 ASSERT(cpuid_checkpass(cpu, 1));
2990 3015 return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2991 3016 }
2992 3017
2993 3018 uint_t
2994 3019 cpuid_get_procnodeid(cpu_t *cpu)
2995 3020 {
2996 3021 ASSERT(cpuid_checkpass(cpu, 1));
↓ open down ↓ |
1413 lines elided |
↑ open up ↑ |
2997 3022 return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2998 3023 }
2999 3024
3000 3025 uint_t
3001 3026 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3002 3027 {
3003 3028 ASSERT(cpuid_checkpass(cpu, 1));
3004 3029 return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3005 3030 }
3006 3031
3032 +uint_t
3033 +cpuid_get_compunitid(cpu_t *cpu)
3034 +{
3035 + ASSERT(cpuid_checkpass(cpu, 1));
3036 + return (cpu->cpu_m.mcpu_cpi->cpi_compunitid);
3037 +}
3038 +
3039 +uint_t
3040 +cpuid_get_cores_per_compunit(cpu_t *cpu)
3041 +{
3042 + ASSERT(cpuid_checkpass(cpu, 1));
3043 + return (cpu->cpu_m.mcpu_cpi->cpi_cores_per_compunit);
3044 +}
3045 +
3007 3046 /*ARGSUSED*/
3008 3047 int
3009 3048 cpuid_have_cr8access(cpu_t *cpu)
3010 3049 {
3011 3050 #if defined(__amd64)
3012 3051 return (1);
3013 3052 #else
3014 3053 struct cpuid_info *cpi;
3015 3054
3016 3055 ASSERT(cpu != NULL);
3017 3056 cpi = cpu->cpu_m.mcpu_cpi;
3018 3057 if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3019 3058 (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3020 3059 return (1);
3021 3060 return (0);
3022 3061 #endif
3023 3062 }
3024 3063
3025 3064 uint32_t
3026 3065 cpuid_get_apicid(cpu_t *cpu)
3027 3066 {
3028 3067 ASSERT(cpuid_checkpass(cpu, 1));
3029 3068 if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3030 3069 return (UINT32_MAX);
3031 3070 } else {
3032 3071 return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3033 3072 }
3034 3073 }
3035 3074
3036 3075 void
3037 3076 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3038 3077 {
3039 3078 struct cpuid_info *cpi;
3040 3079
3041 3080 if (cpu == NULL)
3042 3081 cpu = CPU;
3043 3082 cpi = cpu->cpu_m.mcpu_cpi;
3044 3083
3045 3084 ASSERT(cpuid_checkpass(cpu, 1));
3046 3085
3047 3086 if (pabits)
3048 3087 *pabits = cpi->cpi_pabits;
3049 3088 if (vabits)
3050 3089 *vabits = cpi->cpi_vabits;
3051 3090 }
3052 3091
3053 3092 /*
3054 3093 * Returns the number of data TLB entries for a corresponding
3055 3094 * pagesize. If it can't be computed, or isn't known, the
3056 3095 * routine returns zero. If you ask about an architecturally
3057 3096 * impossible pagesize, the routine will panic (so that the
3058 3097 * hat implementor knows that things are inconsistent.)
3059 3098 */
3060 3099 uint_t
3061 3100 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3062 3101 {
3063 3102 struct cpuid_info *cpi;
3064 3103 uint_t dtlb_nent = 0;
3065 3104
3066 3105 if (cpu == NULL)
3067 3106 cpu = CPU;
3068 3107 cpi = cpu->cpu_m.mcpu_cpi;
3069 3108
3070 3109 ASSERT(cpuid_checkpass(cpu, 1));
3071 3110
3072 3111 /*
3073 3112 * Check the L2 TLB info
3074 3113 */
3075 3114 if (cpi->cpi_xmaxeax >= 0x80000006) {
3076 3115 struct cpuid_regs *cp = &cpi->cpi_extd[6];
3077 3116
3078 3117 switch (pagesize) {
3079 3118
3080 3119 case 4 * 1024:
3081 3120 /*
3082 3121 * All zero in the top 16 bits of the register
3083 3122 * indicates a unified TLB. Size is in low 16 bits.
3084 3123 */
3085 3124 if ((cp->cp_ebx & 0xffff0000) == 0)
3086 3125 dtlb_nent = cp->cp_ebx & 0x0000ffff;
3087 3126 else
3088 3127 dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3089 3128 break;
3090 3129
3091 3130 case 2 * 1024 * 1024:
3092 3131 if ((cp->cp_eax & 0xffff0000) == 0)
3093 3132 dtlb_nent = cp->cp_eax & 0x0000ffff;
3094 3133 else
3095 3134 dtlb_nent = BITX(cp->cp_eax, 27, 16);
3096 3135 break;
3097 3136
3098 3137 default:
3099 3138 panic("unknown L2 pagesize");
3100 3139 /*NOTREACHED*/
3101 3140 }
3102 3141 }
3103 3142
3104 3143 if (dtlb_nent != 0)
3105 3144 return (dtlb_nent);
3106 3145
3107 3146 /*
3108 3147 * No L2 TLB support for this size, try L1.
3109 3148 */
3110 3149 if (cpi->cpi_xmaxeax >= 0x80000005) {
3111 3150 struct cpuid_regs *cp = &cpi->cpi_extd[5];
3112 3151
3113 3152 switch (pagesize) {
3114 3153 case 4 * 1024:
3115 3154 dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3116 3155 break;
3117 3156 case 2 * 1024 * 1024:
3118 3157 dtlb_nent = BITX(cp->cp_eax, 23, 16);
3119 3158 break;
3120 3159 default:
3121 3160 panic("unknown L1 d-TLB pagesize");
3122 3161 /*NOTREACHED*/
3123 3162 }
3124 3163 }
3125 3164
3126 3165 return (dtlb_nent);
3127 3166 }
3128 3167
3129 3168 /*
3130 3169 * Return 0 if the erratum is not present or not applicable, positive
3131 3170 * if it is, and negative if the status of the erratum is unknown.
3132 3171 *
3133 3172 * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3134 3173 * Processors" #25759, Rev 3.57, August 2005
3135 3174 */
3136 3175 int
3137 3176 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3138 3177 {
3139 3178 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3140 3179 uint_t eax;
3141 3180
3142 3181 /*
3143 3182 * Bail out if this CPU isn't an AMD CPU, or if it's
3144 3183 * a legacy (32-bit) AMD CPU.
3145 3184 */
3146 3185 if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3147 3186 cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3148 3187 cpi->cpi_family == 6)
3149 3188
3150 3189 return (0);
3151 3190
3152 3191 eax = cpi->cpi_std[1].cp_eax;
3153 3192
3154 3193 #define SH_B0(eax) (eax == 0xf40 || eax == 0xf50)
3155 3194 #define SH_B3(eax) (eax == 0xf51)
3156 3195 #define B(eax) (SH_B0(eax) || SH_B3(eax))
3157 3196
3158 3197 #define SH_C0(eax) (eax == 0xf48 || eax == 0xf58)
3159 3198
3160 3199 #define SH_CG(eax) (eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3161 3200 #define DH_CG(eax) (eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3162 3201 #define CH_CG(eax) (eax == 0xf82 || eax == 0xfb2)
3163 3202 #define CG(eax) (SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3164 3203
3165 3204 #define SH_D0(eax) (eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3166 3205 #define DH_D0(eax) (eax == 0x10fc0 || eax == 0x10ff0)
3167 3206 #define CH_D0(eax) (eax == 0x10f80 || eax == 0x10fb0)
3168 3207 #define D0(eax) (SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3169 3208
3170 3209 #define SH_E0(eax) (eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3171 3210 #define JH_E1(eax) (eax == 0x20f10) /* JH8_E0 had 0x20f30 */
3172 3211 #define DH_E3(eax) (eax == 0x20fc0 || eax == 0x20ff0)
3173 3212 #define SH_E4(eax) (eax == 0x20f51 || eax == 0x20f71)
3174 3213 #define BH_E4(eax) (eax == 0x20fb1)
3175 3214 #define SH_E5(eax) (eax == 0x20f42)
3176 3215 #define DH_E6(eax) (eax == 0x20ff2 || eax == 0x20fc2)
3177 3216 #define JH_E6(eax) (eax == 0x20f12 || eax == 0x20f32)
3178 3217 #define EX(eax) (SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3179 3218 SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3180 3219 DH_E6(eax) || JH_E6(eax))
3181 3220
3182 3221 #define DR_AX(eax) (eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3183 3222 #define DR_B0(eax) (eax == 0x100f20)
3184 3223 #define DR_B1(eax) (eax == 0x100f21)
3185 3224 #define DR_BA(eax) (eax == 0x100f2a)
3186 3225 #define DR_B2(eax) (eax == 0x100f22)
3187 3226 #define DR_B3(eax) (eax == 0x100f23)
3188 3227 #define RB_C0(eax) (eax == 0x100f40)
3189 3228
3190 3229 switch (erratum) {
3191 3230 case 1:
3192 3231 return (cpi->cpi_family < 0x10);
3193 3232 case 51: /* what does the asterisk mean? */
3194 3233 return (B(eax) || SH_C0(eax) || CG(eax));
3195 3234 case 52:
3196 3235 return (B(eax));
3197 3236 case 57:
3198 3237 return (cpi->cpi_family <= 0x11);
3199 3238 case 58:
3200 3239 return (B(eax));
3201 3240 case 60:
3202 3241 return (cpi->cpi_family <= 0x11);
3203 3242 case 61:
3204 3243 case 62:
3205 3244 case 63:
3206 3245 case 64:
3207 3246 case 65:
3208 3247 case 66:
3209 3248 case 68:
3210 3249 case 69:
3211 3250 case 70:
3212 3251 case 71:
3213 3252 return (B(eax));
3214 3253 case 72:
3215 3254 return (SH_B0(eax));
3216 3255 case 74:
3217 3256 return (B(eax));
3218 3257 case 75:
3219 3258 return (cpi->cpi_family < 0x10);
3220 3259 case 76:
3221 3260 return (B(eax));
3222 3261 case 77:
3223 3262 return (cpi->cpi_family <= 0x11);
3224 3263 case 78:
3225 3264 return (B(eax) || SH_C0(eax));
3226 3265 case 79:
3227 3266 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3228 3267 case 80:
3229 3268 case 81:
3230 3269 case 82:
3231 3270 return (B(eax));
3232 3271 case 83:
3233 3272 return (B(eax) || SH_C0(eax) || CG(eax));
3234 3273 case 85:
3235 3274 return (cpi->cpi_family < 0x10);
3236 3275 case 86:
3237 3276 return (SH_C0(eax) || CG(eax));
3238 3277 case 88:
3239 3278 #if !defined(__amd64)
3240 3279 return (0);
3241 3280 #else
3242 3281 return (B(eax) || SH_C0(eax));
3243 3282 #endif
3244 3283 case 89:
3245 3284 return (cpi->cpi_family < 0x10);
3246 3285 case 90:
3247 3286 return (B(eax) || SH_C0(eax) || CG(eax));
3248 3287 case 91:
3249 3288 case 92:
3250 3289 return (B(eax) || SH_C0(eax));
3251 3290 case 93:
3252 3291 return (SH_C0(eax));
3253 3292 case 94:
3254 3293 return (B(eax) || SH_C0(eax) || CG(eax));
3255 3294 case 95:
3256 3295 #if !defined(__amd64)
3257 3296 return (0);
3258 3297 #else
3259 3298 return (B(eax) || SH_C0(eax));
3260 3299 #endif
3261 3300 case 96:
3262 3301 return (B(eax) || SH_C0(eax) || CG(eax));
3263 3302 case 97:
3264 3303 case 98:
3265 3304 return (SH_C0(eax) || CG(eax));
3266 3305 case 99:
3267 3306 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3268 3307 case 100:
3269 3308 return (B(eax) || SH_C0(eax));
3270 3309 case 101:
3271 3310 case 103:
3272 3311 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3273 3312 case 104:
3274 3313 return (SH_C0(eax) || CG(eax) || D0(eax));
3275 3314 case 105:
3276 3315 case 106:
3277 3316 case 107:
3278 3317 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3279 3318 case 108:
3280 3319 return (DH_CG(eax));
3281 3320 case 109:
3282 3321 return (SH_C0(eax) || CG(eax) || D0(eax));
3283 3322 case 110:
3284 3323 return (D0(eax) || EX(eax));
3285 3324 case 111:
3286 3325 return (CG(eax));
3287 3326 case 112:
3288 3327 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3289 3328 case 113:
3290 3329 return (eax == 0x20fc0);
3291 3330 case 114:
3292 3331 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3293 3332 case 115:
3294 3333 return (SH_E0(eax) || JH_E1(eax));
3295 3334 case 116:
3296 3335 return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3297 3336 case 117:
3298 3337 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3299 3338 case 118:
3300 3339 return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3301 3340 JH_E6(eax));
3302 3341 case 121:
3303 3342 return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3304 3343 case 122:
3305 3344 return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3306 3345 case 123:
3307 3346 return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3308 3347 case 131:
3309 3348 return (cpi->cpi_family < 0x10);
3310 3349 case 6336786:
3311 3350 /*
3312 3351 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3313 3352 * if this is a K8 family or newer processor
3314 3353 */
3315 3354 if (CPI_FAMILY(cpi) == 0xf) {
3316 3355 struct cpuid_regs regs;
3317 3356 regs.cp_eax = 0x80000007;
3318 3357 (void) __cpuid_insn(®s);
3319 3358 return (!(regs.cp_edx & 0x100));
3320 3359 }
3321 3360 return (0);
3322 3361 case 6323525:
3323 3362 return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3324 3363 (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3325 3364
3326 3365 case 6671130:
3327 3366 /*
3328 3367 * check for processors (pre-Shanghai) that do not provide
3329 3368 * optimal management of 1gb ptes in its tlb.
3330 3369 */
3331 3370 return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3332 3371
3333 3372 case 298:
3334 3373 return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3335 3374 DR_B2(eax) || RB_C0(eax));
3336 3375
3337 3376 case 721:
3338 3377 #if defined(__amd64)
3339 3378 return (cpi->cpi_family == 0x10 || cpi->cpi_family == 0x12);
3340 3379 #else
3341 3380 return (0);
3342 3381 #endif
3343 3382
3344 3383 default:
3345 3384 return (-1);
3346 3385
3347 3386 }
3348 3387 }
3349 3388
3350 3389 /*
3351 3390 * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3352 3391 * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3353 3392 */
3354 3393 int
3355 3394 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3356 3395 {
3357 3396 struct cpuid_info *cpi;
3358 3397 uint_t osvwid;
3359 3398 static int osvwfeature = -1;
3360 3399 uint64_t osvwlength;
3361 3400
3362 3401
3363 3402 cpi = cpu->cpu_m.mcpu_cpi;
3364 3403
3365 3404 /* confirm OSVW supported */
3366 3405 if (osvwfeature == -1) {
3367 3406 osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3368 3407 } else {
3369 3408 /* assert that osvw feature setting is consistent on all cpus */
3370 3409 ASSERT(osvwfeature ==
3371 3410 (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3372 3411 }
3373 3412 if (!osvwfeature)
3374 3413 return (-1);
3375 3414
3376 3415 osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3377 3416
3378 3417 switch (erratum) {
3379 3418 case 298: /* osvwid is 0 */
3380 3419 osvwid = 0;
3381 3420 if (osvwlength <= (uint64_t)osvwid) {
3382 3421 /* osvwid 0 is unknown */
3383 3422 return (-1);
3384 3423 }
3385 3424
3386 3425 /*
3387 3426 * Check the OSVW STATUS MSR to determine the state
3388 3427 * of the erratum where:
3389 3428 * 0 - fixed by HW
3390 3429 * 1 - BIOS has applied the workaround when BIOS
3391 3430 * workaround is available. (Or for other errata,
3392 3431 * OS workaround is required.)
3393 3432 * For a value of 1, caller will confirm that the
3394 3433 * erratum 298 workaround has indeed been applied by BIOS.
3395 3434 *
3396 3435 * A 1 may be set in cpus that have a HW fix
3397 3436 * in a mixed cpu system. Regarding erratum 298:
3398 3437 * In a multiprocessor platform, the workaround above
3399 3438 * should be applied to all processors regardless of
3400 3439 * silicon revision when an affected processor is
3401 3440 * present.
3402 3441 */
3403 3442
3404 3443 return (rdmsr(MSR_AMD_OSVW_STATUS +
3405 3444 (osvwid / OSVW_ID_CNT_PER_MSR)) &
3406 3445 (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3407 3446
3408 3447 default:
3409 3448 return (-1);
3410 3449 }
3411 3450 }
3412 3451
3413 3452 static const char assoc_str[] = "associativity";
3414 3453 static const char line_str[] = "line-size";
3415 3454 static const char size_str[] = "size";
3416 3455
3417 3456 static void
3418 3457 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3419 3458 uint32_t val)
3420 3459 {
3421 3460 char buf[128];
3422 3461
3423 3462 /*
3424 3463 * ndi_prop_update_int() is used because it is desirable for
3425 3464 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3426 3465 */
3427 3466 if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3428 3467 (void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3429 3468 }
3430 3469
3431 3470 /*
3432 3471 * Intel-style cache/tlb description
3433 3472 *
3434 3473 * Standard cpuid level 2 gives a randomly ordered
3435 3474 * selection of tags that index into a table that describes
3436 3475 * cache and tlb properties.
3437 3476 */
3438 3477
3439 3478 static const char l1_icache_str[] = "l1-icache";
3440 3479 static const char l1_dcache_str[] = "l1-dcache";
3441 3480 static const char l2_cache_str[] = "l2-cache";
3442 3481 static const char l3_cache_str[] = "l3-cache";
3443 3482 static const char itlb4k_str[] = "itlb-4K";
3444 3483 static const char dtlb4k_str[] = "dtlb-4K";
3445 3484 static const char itlb2M_str[] = "itlb-2M";
3446 3485 static const char itlb4M_str[] = "itlb-4M";
3447 3486 static const char dtlb4M_str[] = "dtlb-4M";
3448 3487 static const char dtlb24_str[] = "dtlb0-2M-4M";
3449 3488 static const char itlb424_str[] = "itlb-4K-2M-4M";
3450 3489 static const char itlb24_str[] = "itlb-2M-4M";
3451 3490 static const char dtlb44_str[] = "dtlb-4K-4M";
3452 3491 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3453 3492 static const char sl2_cache_str[] = "sectored-l2-cache";
3454 3493 static const char itrace_str[] = "itrace-cache";
3455 3494 static const char sl3_cache_str[] = "sectored-l3-cache";
3456 3495 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3457 3496
3458 3497 static const struct cachetab {
3459 3498 uint8_t ct_code;
3460 3499 uint8_t ct_assoc;
3461 3500 uint16_t ct_line_size;
3462 3501 size_t ct_size;
3463 3502 const char *ct_label;
3464 3503 } intel_ctab[] = {
3465 3504 /*
3466 3505 * maintain descending order!
3467 3506 *
3468 3507 * Codes ignored - Reason
3469 3508 * ----------------------
3470 3509 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3471 3510 * f0H/f1H - Currently we do not interpret prefetch size by design
3472 3511 */
3473 3512 { 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3474 3513 { 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3475 3514 { 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3476 3515 { 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3477 3516 { 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3478 3517 { 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3479 3518 { 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3480 3519 { 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3481 3520 { 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3482 3521 { 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3483 3522 { 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3484 3523 { 0xd0, 4, 64, 512*1024, l3_cache_str},
3485 3524 { 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3486 3525 { 0xc0, 4, 0, 8, dtlb44_str },
3487 3526 { 0xba, 4, 0, 64, dtlb4k_str },
3488 3527 { 0xb4, 4, 0, 256, dtlb4k_str },
3489 3528 { 0xb3, 4, 0, 128, dtlb4k_str },
3490 3529 { 0xb2, 4, 0, 64, itlb4k_str },
3491 3530 { 0xb0, 4, 0, 128, itlb4k_str },
3492 3531 { 0x87, 8, 64, 1024*1024, l2_cache_str},
3493 3532 { 0x86, 4, 64, 512*1024, l2_cache_str},
3494 3533 { 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3495 3534 { 0x84, 8, 32, 1024*1024, l2_cache_str},
3496 3535 { 0x83, 8, 32, 512*1024, l2_cache_str},
3497 3536 { 0x82, 8, 32, 256*1024, l2_cache_str},
3498 3537 { 0x80, 8, 64, 512*1024, l2_cache_str},
3499 3538 { 0x7f, 2, 64, 512*1024, l2_cache_str},
3500 3539 { 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3501 3540 { 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3502 3541 { 0x7b, 8, 64, 512*1024, sl2_cache_str},
3503 3542 { 0x7a, 8, 64, 256*1024, sl2_cache_str},
3504 3543 { 0x79, 8, 64, 128*1024, sl2_cache_str},
3505 3544 { 0x78, 8, 64, 1024*1024, l2_cache_str},
3506 3545 { 0x73, 8, 0, 64*1024, itrace_str},
3507 3546 { 0x72, 8, 0, 32*1024, itrace_str},
3508 3547 { 0x71, 8, 0, 16*1024, itrace_str},
3509 3548 { 0x70, 8, 0, 12*1024, itrace_str},
3510 3549 { 0x68, 4, 64, 32*1024, sl1_dcache_str},
3511 3550 { 0x67, 4, 64, 16*1024, sl1_dcache_str},
3512 3551 { 0x66, 4, 64, 8*1024, sl1_dcache_str},
3513 3552 { 0x60, 8, 64, 16*1024, sl1_dcache_str},
3514 3553 { 0x5d, 0, 0, 256, dtlb44_str},
3515 3554 { 0x5c, 0, 0, 128, dtlb44_str},
3516 3555 { 0x5b, 0, 0, 64, dtlb44_str},
3517 3556 { 0x5a, 4, 0, 32, dtlb24_str},
3518 3557 { 0x59, 0, 0, 16, dtlb4k_str},
3519 3558 { 0x57, 4, 0, 16, dtlb4k_str},
3520 3559 { 0x56, 4, 0, 16, dtlb4M_str},
3521 3560 { 0x55, 0, 0, 7, itlb24_str},
3522 3561 { 0x52, 0, 0, 256, itlb424_str},
3523 3562 { 0x51, 0, 0, 128, itlb424_str},
3524 3563 { 0x50, 0, 0, 64, itlb424_str},
3525 3564 { 0x4f, 0, 0, 32, itlb4k_str},
3526 3565 { 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3527 3566 { 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3528 3567 { 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3529 3568 { 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3530 3569 { 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3531 3570 { 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3532 3571 { 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3533 3572 { 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3534 3573 { 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3535 3574 { 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3536 3575 { 0x44, 4, 32, 1024*1024, l2_cache_str},
3537 3576 { 0x43, 4, 32, 512*1024, l2_cache_str},
3538 3577 { 0x42, 4, 32, 256*1024, l2_cache_str},
3539 3578 { 0x41, 4, 32, 128*1024, l2_cache_str},
3540 3579 { 0x3e, 4, 64, 512*1024, sl2_cache_str},
3541 3580 { 0x3d, 6, 64, 384*1024, sl2_cache_str},
3542 3581 { 0x3c, 4, 64, 256*1024, sl2_cache_str},
3543 3582 { 0x3b, 2, 64, 128*1024, sl2_cache_str},
3544 3583 { 0x3a, 6, 64, 192*1024, sl2_cache_str},
3545 3584 { 0x39, 4, 64, 128*1024, sl2_cache_str},
3546 3585 { 0x30, 8, 64, 32*1024, l1_icache_str},
3547 3586 { 0x2c, 8, 64, 32*1024, l1_dcache_str},
3548 3587 { 0x29, 8, 64, 4096*1024, sl3_cache_str},
3549 3588 { 0x25, 8, 64, 2048*1024, sl3_cache_str},
3550 3589 { 0x23, 8, 64, 1024*1024, sl3_cache_str},
3551 3590 { 0x22, 4, 64, 512*1024, sl3_cache_str},
3552 3591 { 0x0e, 6, 64, 24*1024, l1_dcache_str},
3553 3592 { 0x0d, 4, 32, 16*1024, l1_dcache_str},
3554 3593 { 0x0c, 4, 32, 16*1024, l1_dcache_str},
3555 3594 { 0x0b, 4, 0, 4, itlb4M_str},
3556 3595 { 0x0a, 2, 32, 8*1024, l1_dcache_str},
3557 3596 { 0x08, 4, 32, 16*1024, l1_icache_str},
3558 3597 { 0x06, 4, 32, 8*1024, l1_icache_str},
3559 3598 { 0x05, 4, 0, 32, dtlb4M_str},
3560 3599 { 0x04, 4, 0, 8, dtlb4M_str},
3561 3600 { 0x03, 4, 0, 64, dtlb4k_str},
3562 3601 { 0x02, 4, 0, 2, itlb4M_str},
3563 3602 { 0x01, 4, 0, 32, itlb4k_str},
3564 3603 { 0 }
3565 3604 };
3566 3605
3567 3606 static const struct cachetab cyrix_ctab[] = {
3568 3607 { 0x70, 4, 0, 32, "tlb-4K" },
3569 3608 { 0x80, 4, 16, 16*1024, "l1-cache" },
3570 3609 { 0 }
3571 3610 };
3572 3611
3573 3612 /*
3574 3613 * Search a cache table for a matching entry
3575 3614 */
3576 3615 static const struct cachetab *
3577 3616 find_cacheent(const struct cachetab *ct, uint_t code)
3578 3617 {
3579 3618 if (code != 0) {
3580 3619 for (; ct->ct_code != 0; ct++)
3581 3620 if (ct->ct_code <= code)
3582 3621 break;
3583 3622 if (ct->ct_code == code)
3584 3623 return (ct);
3585 3624 }
3586 3625 return (NULL);
3587 3626 }
3588 3627
3589 3628 /*
3590 3629 * Populate cachetab entry with L2 or L3 cache-information using
3591 3630 * cpuid function 4. This function is called from intel_walk_cacheinfo()
3592 3631 * when descriptor 0x49 is encountered. It returns 0 if no such cache
3593 3632 * information is found.
3594 3633 */
3595 3634 static int
3596 3635 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3597 3636 {
3598 3637 uint32_t level, i;
3599 3638 int ret = 0;
3600 3639
3601 3640 for (i = 0; i < cpi->cpi_std_4_size; i++) {
3602 3641 level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3603 3642
3604 3643 if (level == 2 || level == 3) {
3605 3644 ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3606 3645 ct->ct_line_size =
3607 3646 CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3608 3647 ct->ct_size = ct->ct_assoc *
3609 3648 (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3610 3649 ct->ct_line_size *
3611 3650 (cpi->cpi_std_4[i]->cp_ecx + 1);
3612 3651
3613 3652 if (level == 2) {
3614 3653 ct->ct_label = l2_cache_str;
3615 3654 } else if (level == 3) {
3616 3655 ct->ct_label = l3_cache_str;
3617 3656 }
3618 3657 ret = 1;
3619 3658 }
3620 3659 }
3621 3660
3622 3661 return (ret);
3623 3662 }
3624 3663
3625 3664 /*
3626 3665 * Walk the cacheinfo descriptor, applying 'func' to every valid element
3627 3666 * The walk is terminated if the walker returns non-zero.
3628 3667 */
3629 3668 static void
3630 3669 intel_walk_cacheinfo(struct cpuid_info *cpi,
3631 3670 void *arg, int (*func)(void *, const struct cachetab *))
3632 3671 {
3633 3672 const struct cachetab *ct;
3634 3673 struct cachetab des_49_ct, des_b1_ct;
3635 3674 uint8_t *dp;
3636 3675 int i;
3637 3676
3638 3677 if ((dp = cpi->cpi_cacheinfo) == NULL)
3639 3678 return;
3640 3679 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3641 3680 /*
3642 3681 * For overloaded descriptor 0x49 we use cpuid function 4
3643 3682 * if supported by the current processor, to create
3644 3683 * cache information.
3645 3684 * For overloaded descriptor 0xb1 we use X86_PAE flag
3646 3685 * to disambiguate the cache information.
3647 3686 */
3648 3687 if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3649 3688 intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3650 3689 ct = &des_49_ct;
3651 3690 } else if (*dp == 0xb1) {
3652 3691 des_b1_ct.ct_code = 0xb1;
3653 3692 des_b1_ct.ct_assoc = 4;
3654 3693 des_b1_ct.ct_line_size = 0;
3655 3694 if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3656 3695 des_b1_ct.ct_size = 8;
3657 3696 des_b1_ct.ct_label = itlb2M_str;
3658 3697 } else {
3659 3698 des_b1_ct.ct_size = 4;
3660 3699 des_b1_ct.ct_label = itlb4M_str;
3661 3700 }
3662 3701 ct = &des_b1_ct;
3663 3702 } else {
3664 3703 if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3665 3704 continue;
3666 3705 }
3667 3706 }
3668 3707
3669 3708 if (func(arg, ct) != 0) {
3670 3709 break;
3671 3710 }
3672 3711 }
3673 3712 }
3674 3713
3675 3714 /*
3676 3715 * (Like the Intel one, except for Cyrix CPUs)
3677 3716 */
3678 3717 static void
3679 3718 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3680 3719 void *arg, int (*func)(void *, const struct cachetab *))
3681 3720 {
3682 3721 const struct cachetab *ct;
3683 3722 uint8_t *dp;
3684 3723 int i;
3685 3724
3686 3725 if ((dp = cpi->cpi_cacheinfo) == NULL)
3687 3726 return;
3688 3727 for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3689 3728 /*
3690 3729 * Search Cyrix-specific descriptor table first ..
3691 3730 */
3692 3731 if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3693 3732 if (func(arg, ct) != 0)
3694 3733 break;
3695 3734 continue;
3696 3735 }
3697 3736 /*
3698 3737 * .. else fall back to the Intel one
3699 3738 */
3700 3739 if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3701 3740 if (func(arg, ct) != 0)
3702 3741 break;
3703 3742 continue;
3704 3743 }
3705 3744 }
3706 3745 }
3707 3746
3708 3747 /*
3709 3748 * A cacheinfo walker that adds associativity, line-size, and size properties
3710 3749 * to the devinfo node it is passed as an argument.
3711 3750 */
3712 3751 static int
3713 3752 add_cacheent_props(void *arg, const struct cachetab *ct)
3714 3753 {
3715 3754 dev_info_t *devi = arg;
3716 3755
3717 3756 add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3718 3757 if (ct->ct_line_size != 0)
3719 3758 add_cache_prop(devi, ct->ct_label, line_str,
3720 3759 ct->ct_line_size);
3721 3760 add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3722 3761 return (0);
3723 3762 }
3724 3763
3725 3764
3726 3765 static const char fully_assoc[] = "fully-associative?";
3727 3766
3728 3767 /*
3729 3768 * AMD style cache/tlb description
3730 3769 *
3731 3770 * Extended functions 5 and 6 directly describe properties of
3732 3771 * tlbs and various cache levels.
3733 3772 */
3734 3773 static void
3735 3774 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3736 3775 {
3737 3776 switch (assoc) {
3738 3777 case 0: /* reserved; ignore */
3739 3778 break;
3740 3779 default:
3741 3780 add_cache_prop(devi, label, assoc_str, assoc);
3742 3781 break;
3743 3782 case 0xff:
3744 3783 add_cache_prop(devi, label, fully_assoc, 1);
3745 3784 break;
3746 3785 }
3747 3786 }
3748 3787
3749 3788 static void
3750 3789 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3751 3790 {
3752 3791 if (size == 0)
3753 3792 return;
3754 3793 add_cache_prop(devi, label, size_str, size);
3755 3794 add_amd_assoc(devi, label, assoc);
3756 3795 }
3757 3796
3758 3797 static void
3759 3798 add_amd_cache(dev_info_t *devi, const char *label,
3760 3799 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3761 3800 {
3762 3801 if (size == 0 || line_size == 0)
3763 3802 return;
3764 3803 add_amd_assoc(devi, label, assoc);
3765 3804 /*
3766 3805 * Most AMD parts have a sectored cache. Multiple cache lines are
3767 3806 * associated with each tag. A sector consists of all cache lines
3768 3807 * associated with a tag. For example, the AMD K6-III has a sector
3769 3808 * size of 2 cache lines per tag.
3770 3809 */
3771 3810 if (lines_per_tag != 0)
3772 3811 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3773 3812 add_cache_prop(devi, label, line_str, line_size);
3774 3813 add_cache_prop(devi, label, size_str, size * 1024);
3775 3814 }
3776 3815
3777 3816 static void
3778 3817 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3779 3818 {
3780 3819 switch (assoc) {
3781 3820 case 0: /* off */
3782 3821 break;
3783 3822 case 1:
3784 3823 case 2:
3785 3824 case 4:
3786 3825 add_cache_prop(devi, label, assoc_str, assoc);
3787 3826 break;
3788 3827 case 6:
3789 3828 add_cache_prop(devi, label, assoc_str, 8);
3790 3829 break;
3791 3830 case 8:
3792 3831 add_cache_prop(devi, label, assoc_str, 16);
3793 3832 break;
3794 3833 case 0xf:
3795 3834 add_cache_prop(devi, label, fully_assoc, 1);
3796 3835 break;
3797 3836 default: /* reserved; ignore */
3798 3837 break;
3799 3838 }
3800 3839 }
3801 3840
3802 3841 static void
3803 3842 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3804 3843 {
3805 3844 if (size == 0 || assoc == 0)
3806 3845 return;
3807 3846 add_amd_l2_assoc(devi, label, assoc);
3808 3847 add_cache_prop(devi, label, size_str, size);
3809 3848 }
3810 3849
3811 3850 static void
3812 3851 add_amd_l2_cache(dev_info_t *devi, const char *label,
3813 3852 uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3814 3853 {
3815 3854 if (size == 0 || assoc == 0 || line_size == 0)
3816 3855 return;
3817 3856 add_amd_l2_assoc(devi, label, assoc);
3818 3857 if (lines_per_tag != 0)
3819 3858 add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3820 3859 add_cache_prop(devi, label, line_str, line_size);
3821 3860 add_cache_prop(devi, label, size_str, size * 1024);
3822 3861 }
3823 3862
3824 3863 static void
3825 3864 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3826 3865 {
3827 3866 struct cpuid_regs *cp;
3828 3867
3829 3868 if (cpi->cpi_xmaxeax < 0x80000005)
3830 3869 return;
3831 3870 cp = &cpi->cpi_extd[5];
3832 3871
3833 3872 /*
3834 3873 * 4M/2M L1 TLB configuration
3835 3874 *
3836 3875 * We report the size for 2M pages because AMD uses two
3837 3876 * TLB entries for one 4M page.
3838 3877 */
3839 3878 add_amd_tlb(devi, "dtlb-2M",
3840 3879 BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3841 3880 add_amd_tlb(devi, "itlb-2M",
3842 3881 BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3843 3882
3844 3883 /*
3845 3884 * 4K L1 TLB configuration
3846 3885 */
3847 3886
3848 3887 switch (cpi->cpi_vendor) {
3849 3888 uint_t nentries;
3850 3889 case X86_VENDOR_TM:
3851 3890 if (cpi->cpi_family >= 5) {
3852 3891 /*
3853 3892 * Crusoe processors have 256 TLB entries, but
3854 3893 * cpuid data format constrains them to only
3855 3894 * reporting 255 of them.
3856 3895 */
3857 3896 if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3858 3897 nentries = 256;
3859 3898 /*
3860 3899 * Crusoe processors also have a unified TLB
3861 3900 */
3862 3901 add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3863 3902 nentries);
3864 3903 break;
3865 3904 }
3866 3905 /*FALLTHROUGH*/
3867 3906 default:
3868 3907 add_amd_tlb(devi, itlb4k_str,
3869 3908 BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3870 3909 add_amd_tlb(devi, dtlb4k_str,
3871 3910 BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3872 3911 break;
3873 3912 }
3874 3913
3875 3914 /*
3876 3915 * data L1 cache configuration
3877 3916 */
3878 3917
3879 3918 add_amd_cache(devi, l1_dcache_str,
3880 3919 BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3881 3920 BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3882 3921
3883 3922 /*
3884 3923 * code L1 cache configuration
3885 3924 */
3886 3925
3887 3926 add_amd_cache(devi, l1_icache_str,
3888 3927 BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3889 3928 BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3890 3929
3891 3930 if (cpi->cpi_xmaxeax < 0x80000006)
3892 3931 return;
3893 3932 cp = &cpi->cpi_extd[6];
3894 3933
3895 3934 /* Check for a unified L2 TLB for large pages */
3896 3935
3897 3936 if (BITX(cp->cp_eax, 31, 16) == 0)
3898 3937 add_amd_l2_tlb(devi, "l2-tlb-2M",
3899 3938 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3900 3939 else {
3901 3940 add_amd_l2_tlb(devi, "l2-dtlb-2M",
3902 3941 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3903 3942 add_amd_l2_tlb(devi, "l2-itlb-2M",
3904 3943 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3905 3944 }
3906 3945
3907 3946 /* Check for a unified L2 TLB for 4K pages */
3908 3947
3909 3948 if (BITX(cp->cp_ebx, 31, 16) == 0) {
3910 3949 add_amd_l2_tlb(devi, "l2-tlb-4K",
3911 3950 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3912 3951 } else {
3913 3952 add_amd_l2_tlb(devi, "l2-dtlb-4K",
3914 3953 BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3915 3954 add_amd_l2_tlb(devi, "l2-itlb-4K",
3916 3955 BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3917 3956 }
3918 3957
3919 3958 add_amd_l2_cache(devi, l2_cache_str,
3920 3959 BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3921 3960 BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3922 3961 }
3923 3962
3924 3963 /*
3925 3964 * There are two basic ways that the x86 world describes it cache
3926 3965 * and tlb architecture - Intel's way and AMD's way.
3927 3966 *
3928 3967 * Return which flavor of cache architecture we should use
3929 3968 */
3930 3969 static int
3931 3970 x86_which_cacheinfo(struct cpuid_info *cpi)
3932 3971 {
3933 3972 switch (cpi->cpi_vendor) {
3934 3973 case X86_VENDOR_Intel:
3935 3974 if (cpi->cpi_maxeax >= 2)
3936 3975 return (X86_VENDOR_Intel);
3937 3976 break;
3938 3977 case X86_VENDOR_AMD:
3939 3978 /*
3940 3979 * The K5 model 1 was the first part from AMD that reported
3941 3980 * cache sizes via extended cpuid functions.
3942 3981 */
3943 3982 if (cpi->cpi_family > 5 ||
3944 3983 (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3945 3984 return (X86_VENDOR_AMD);
3946 3985 break;
3947 3986 case X86_VENDOR_TM:
3948 3987 if (cpi->cpi_family >= 5)
3949 3988 return (X86_VENDOR_AMD);
3950 3989 /*FALLTHROUGH*/
3951 3990 default:
3952 3991 /*
3953 3992 * If they have extended CPU data for 0x80000005
3954 3993 * then we assume they have AMD-format cache
3955 3994 * information.
3956 3995 *
3957 3996 * If not, and the vendor happens to be Cyrix,
3958 3997 * then try our-Cyrix specific handler.
3959 3998 *
3960 3999 * If we're not Cyrix, then assume we're using Intel's
3961 4000 * table-driven format instead.
3962 4001 */
3963 4002 if (cpi->cpi_xmaxeax >= 0x80000005)
3964 4003 return (X86_VENDOR_AMD);
3965 4004 else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
3966 4005 return (X86_VENDOR_Cyrix);
3967 4006 else if (cpi->cpi_maxeax >= 2)
3968 4007 return (X86_VENDOR_Intel);
3969 4008 break;
3970 4009 }
3971 4010 return (-1);
3972 4011 }
3973 4012
3974 4013 void
3975 4014 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
3976 4015 struct cpuid_info *cpi)
3977 4016 {
3978 4017 dev_info_t *cpu_devi;
3979 4018 int create;
3980 4019
3981 4020 cpu_devi = (dev_info_t *)dip;
3982 4021
3983 4022 /* device_type */
3984 4023 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3985 4024 "device_type", "cpu");
3986 4025
3987 4026 /* reg */
3988 4027 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3989 4028 "reg", cpu_id);
3990 4029
3991 4030 /* cpu-mhz, and clock-frequency */
3992 4031 if (cpu_freq > 0) {
3993 4032 long long mul;
3994 4033
3995 4034 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3996 4035 "cpu-mhz", cpu_freq);
3997 4036 if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3998 4037 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3999 4038 "clock-frequency", (int)mul);
4000 4039 }
4001 4040
4002 4041 if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
4003 4042 return;
4004 4043 }
4005 4044
4006 4045 /* vendor-id */
4007 4046 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4008 4047 "vendor-id", cpi->cpi_vendorstr);
4009 4048
4010 4049 if (cpi->cpi_maxeax == 0) {
4011 4050 return;
4012 4051 }
4013 4052
4014 4053 /*
4015 4054 * family, model, and step
4016 4055 */
4017 4056 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4018 4057 "family", CPI_FAMILY(cpi));
4019 4058 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4020 4059 "cpu-model", CPI_MODEL(cpi));
4021 4060 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4022 4061 "stepping-id", CPI_STEP(cpi));
4023 4062
4024 4063 /* type */
4025 4064 switch (cpi->cpi_vendor) {
4026 4065 case X86_VENDOR_Intel:
4027 4066 create = 1;
4028 4067 break;
4029 4068 default:
4030 4069 create = 0;
4031 4070 break;
4032 4071 }
4033 4072 if (create)
4034 4073 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4035 4074 "type", CPI_TYPE(cpi));
4036 4075
4037 4076 /* ext-family */
4038 4077 switch (cpi->cpi_vendor) {
4039 4078 case X86_VENDOR_Intel:
4040 4079 case X86_VENDOR_AMD:
4041 4080 create = cpi->cpi_family >= 0xf;
4042 4081 break;
4043 4082 default:
4044 4083 create = 0;
4045 4084 break;
4046 4085 }
4047 4086 if (create)
4048 4087 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4049 4088 "ext-family", CPI_FAMILY_XTD(cpi));
4050 4089
4051 4090 /* ext-model */
4052 4091 switch (cpi->cpi_vendor) {
4053 4092 case X86_VENDOR_Intel:
4054 4093 create = IS_EXTENDED_MODEL_INTEL(cpi);
4055 4094 break;
4056 4095 case X86_VENDOR_AMD:
4057 4096 create = CPI_FAMILY(cpi) == 0xf;
4058 4097 break;
4059 4098 default:
4060 4099 create = 0;
4061 4100 break;
4062 4101 }
4063 4102 if (create)
4064 4103 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4065 4104 "ext-model", CPI_MODEL_XTD(cpi));
4066 4105
4067 4106 /* generation */
4068 4107 switch (cpi->cpi_vendor) {
4069 4108 case X86_VENDOR_AMD:
4070 4109 /*
4071 4110 * AMD K5 model 1 was the first part to support this
4072 4111 */
4073 4112 create = cpi->cpi_xmaxeax >= 0x80000001;
4074 4113 break;
4075 4114 default:
4076 4115 create = 0;
4077 4116 break;
4078 4117 }
4079 4118 if (create)
4080 4119 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4081 4120 "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4082 4121
4083 4122 /* brand-id */
4084 4123 switch (cpi->cpi_vendor) {
4085 4124 case X86_VENDOR_Intel:
4086 4125 /*
4087 4126 * brand id first appeared on Pentium III Xeon model 8,
4088 4127 * and Celeron model 8 processors and Opteron
4089 4128 */
4090 4129 create = cpi->cpi_family > 6 ||
4091 4130 (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4092 4131 break;
4093 4132 case X86_VENDOR_AMD:
4094 4133 create = cpi->cpi_family >= 0xf;
4095 4134 break;
4096 4135 default:
4097 4136 create = 0;
4098 4137 break;
4099 4138 }
4100 4139 if (create && cpi->cpi_brandid != 0) {
4101 4140 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4102 4141 "brand-id", cpi->cpi_brandid);
4103 4142 }
4104 4143
4105 4144 /* chunks, and apic-id */
4106 4145 switch (cpi->cpi_vendor) {
4107 4146 /*
4108 4147 * first available on Pentium IV and Opteron (K8)
4109 4148 */
4110 4149 case X86_VENDOR_Intel:
4111 4150 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4112 4151 break;
4113 4152 case X86_VENDOR_AMD:
4114 4153 create = cpi->cpi_family >= 0xf;
4115 4154 break;
4116 4155 default:
4117 4156 create = 0;
4118 4157 break;
4119 4158 }
4120 4159 if (create) {
4121 4160 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4122 4161 "chunks", CPI_CHUNKS(cpi));
4123 4162 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4124 4163 "apic-id", cpi->cpi_apicid);
4125 4164 if (cpi->cpi_chipid >= 0) {
4126 4165 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4127 4166 "chip#", cpi->cpi_chipid);
4128 4167 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4129 4168 "clog#", cpi->cpi_clogid);
4130 4169 }
4131 4170 }
4132 4171
4133 4172 /* cpuid-features */
4134 4173 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4135 4174 "cpuid-features", CPI_FEATURES_EDX(cpi));
4136 4175
4137 4176
4138 4177 /* cpuid-features-ecx */
4139 4178 switch (cpi->cpi_vendor) {
4140 4179 case X86_VENDOR_Intel:
4141 4180 create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4142 4181 break;
4143 4182 case X86_VENDOR_AMD:
4144 4183 create = cpi->cpi_family >= 0xf;
4145 4184 break;
4146 4185 default:
4147 4186 create = 0;
4148 4187 break;
4149 4188 }
4150 4189 if (create)
4151 4190 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4152 4191 "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4153 4192
4154 4193 /* ext-cpuid-features */
4155 4194 switch (cpi->cpi_vendor) {
4156 4195 case X86_VENDOR_Intel:
4157 4196 case X86_VENDOR_AMD:
4158 4197 case X86_VENDOR_Cyrix:
4159 4198 case X86_VENDOR_TM:
4160 4199 case X86_VENDOR_Centaur:
4161 4200 create = cpi->cpi_xmaxeax >= 0x80000001;
4162 4201 break;
4163 4202 default:
4164 4203 create = 0;
4165 4204 break;
4166 4205 }
4167 4206 if (create) {
4168 4207 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4169 4208 "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4170 4209 (void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4171 4210 "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4172 4211 }
4173 4212
4174 4213 /*
4175 4214 * Brand String first appeared in Intel Pentium IV, AMD K5
4176 4215 * model 1, and Cyrix GXm. On earlier models we try and
4177 4216 * simulate something similar .. so this string should always
4178 4217 * same -something- about the processor, however lame.
4179 4218 */
4180 4219 (void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4181 4220 "brand-string", cpi->cpi_brandstr);
4182 4221
4183 4222 /*
4184 4223 * Finally, cache and tlb information
4185 4224 */
4186 4225 switch (x86_which_cacheinfo(cpi)) {
4187 4226 case X86_VENDOR_Intel:
4188 4227 intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4189 4228 break;
4190 4229 case X86_VENDOR_Cyrix:
4191 4230 cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4192 4231 break;
4193 4232 case X86_VENDOR_AMD:
4194 4233 amd_cache_info(cpi, cpu_devi);
4195 4234 break;
4196 4235 default:
4197 4236 break;
4198 4237 }
4199 4238 }
4200 4239
4201 4240 struct l2info {
4202 4241 int *l2i_csz;
4203 4242 int *l2i_lsz;
4204 4243 int *l2i_assoc;
4205 4244 int l2i_ret;
4206 4245 };
4207 4246
4208 4247 /*
4209 4248 * A cacheinfo walker that fetches the size, line-size and associativity
4210 4249 * of the L2 cache
4211 4250 */
4212 4251 static int
4213 4252 intel_l2cinfo(void *arg, const struct cachetab *ct)
4214 4253 {
4215 4254 struct l2info *l2i = arg;
4216 4255 int *ip;
4217 4256
4218 4257 if (ct->ct_label != l2_cache_str &&
4219 4258 ct->ct_label != sl2_cache_str)
4220 4259 return (0); /* not an L2 -- keep walking */
4221 4260
4222 4261 if ((ip = l2i->l2i_csz) != NULL)
4223 4262 *ip = ct->ct_size;
4224 4263 if ((ip = l2i->l2i_lsz) != NULL)
4225 4264 *ip = ct->ct_line_size;
4226 4265 if ((ip = l2i->l2i_assoc) != NULL)
4227 4266 *ip = ct->ct_assoc;
4228 4267 l2i->l2i_ret = ct->ct_size;
4229 4268 return (1); /* was an L2 -- terminate walk */
4230 4269 }
4231 4270
4232 4271 /*
4233 4272 * AMD L2/L3 Cache and TLB Associativity Field Definition:
4234 4273 *
4235 4274 * Unlike the associativity for the L1 cache and tlb where the 8 bit
4236 4275 * value is the associativity, the associativity for the L2 cache and
4237 4276 * tlb is encoded in the following table. The 4 bit L2 value serves as
4238 4277 * an index into the amd_afd[] array to determine the associativity.
4239 4278 * -1 is undefined. 0 is fully associative.
4240 4279 */
4241 4280
4242 4281 static int amd_afd[] =
4243 4282 {-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4244 4283
4245 4284 static void
4246 4285 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4247 4286 {
4248 4287 struct cpuid_regs *cp;
4249 4288 uint_t size, assoc;
4250 4289 int i;
4251 4290 int *ip;
4252 4291
4253 4292 if (cpi->cpi_xmaxeax < 0x80000006)
4254 4293 return;
4255 4294 cp = &cpi->cpi_extd[6];
4256 4295
4257 4296 if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4258 4297 (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4259 4298 uint_t cachesz = size * 1024;
4260 4299 assoc = amd_afd[i];
4261 4300
4262 4301 ASSERT(assoc != -1);
4263 4302
4264 4303 if ((ip = l2i->l2i_csz) != NULL)
4265 4304 *ip = cachesz;
4266 4305 if ((ip = l2i->l2i_lsz) != NULL)
4267 4306 *ip = BITX(cp->cp_ecx, 7, 0);
4268 4307 if ((ip = l2i->l2i_assoc) != NULL)
4269 4308 *ip = assoc;
4270 4309 l2i->l2i_ret = cachesz;
4271 4310 }
4272 4311 }
4273 4312
4274 4313 int
4275 4314 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4276 4315 {
4277 4316 struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4278 4317 struct l2info __l2info, *l2i = &__l2info;
4279 4318
4280 4319 l2i->l2i_csz = csz;
4281 4320 l2i->l2i_lsz = lsz;
4282 4321 l2i->l2i_assoc = assoc;
4283 4322 l2i->l2i_ret = -1;
4284 4323
4285 4324 switch (x86_which_cacheinfo(cpi)) {
4286 4325 case X86_VENDOR_Intel:
4287 4326 intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4288 4327 break;
4289 4328 case X86_VENDOR_Cyrix:
4290 4329 cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4291 4330 break;
4292 4331 case X86_VENDOR_AMD:
4293 4332 amd_l2cacheinfo(cpi, l2i);
4294 4333 break;
4295 4334 default:
4296 4335 break;
4297 4336 }
4298 4337 return (l2i->l2i_ret);
4299 4338 }
4300 4339
4301 4340 #if !defined(__xpv)
4302 4341
4303 4342 uint32_t *
4304 4343 cpuid_mwait_alloc(cpu_t *cpu)
4305 4344 {
4306 4345 uint32_t *ret;
4307 4346 size_t mwait_size;
4308 4347
4309 4348 ASSERT(cpuid_checkpass(CPU, 2));
4310 4349
4311 4350 mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4312 4351 if (mwait_size == 0)
4313 4352 return (NULL);
4314 4353
4315 4354 /*
4316 4355 * kmem_alloc() returns cache line size aligned data for mwait_size
4317 4356 * allocations. mwait_size is currently cache line sized. Neither
4318 4357 * of these implementation details are guarantied to be true in the
4319 4358 * future.
4320 4359 *
4321 4360 * First try allocating mwait_size as kmem_alloc() currently returns
4322 4361 * correctly aligned memory. If kmem_alloc() does not return
4323 4362 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4324 4363 *
4325 4364 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4326 4365 * decide to free this memory.
4327 4366 */
4328 4367 ret = kmem_zalloc(mwait_size, KM_SLEEP);
4329 4368 if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4330 4369 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4331 4370 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4332 4371 *ret = MWAIT_RUNNING;
4333 4372 return (ret);
4334 4373 } else {
4335 4374 kmem_free(ret, mwait_size);
4336 4375 ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4337 4376 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4338 4377 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4339 4378 ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4340 4379 *ret = MWAIT_RUNNING;
4341 4380 return (ret);
4342 4381 }
4343 4382 }
4344 4383
4345 4384 void
4346 4385 cpuid_mwait_free(cpu_t *cpu)
4347 4386 {
4348 4387 if (cpu->cpu_m.mcpu_cpi == NULL) {
4349 4388 return;
4350 4389 }
4351 4390
4352 4391 if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4353 4392 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4354 4393 kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4355 4394 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4356 4395 }
4357 4396
4358 4397 cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4359 4398 cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4360 4399 }
4361 4400
4362 4401 void
4363 4402 patch_tsc_read(int flag)
4364 4403 {
4365 4404 size_t cnt;
4366 4405
4367 4406 switch (flag) {
4368 4407 case X86_NO_TSC:
4369 4408 cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4370 4409 (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4371 4410 break;
4372 4411 case X86_HAVE_TSCP:
4373 4412 cnt = &_tscp_end - &_tscp_start;
4374 4413 (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4375 4414 break;
4376 4415 case X86_TSC_MFENCE:
4377 4416 cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4378 4417 (void) memcpy((void *)tsc_read,
4379 4418 (void *)&_tsc_mfence_start, cnt);
4380 4419 break;
4381 4420 case X86_TSC_LFENCE:
4382 4421 cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4383 4422 (void) memcpy((void *)tsc_read,
4384 4423 (void *)&_tsc_lfence_start, cnt);
4385 4424 break;
4386 4425 default:
4387 4426 break;
4388 4427 }
4389 4428 }
4390 4429
4391 4430 int
4392 4431 cpuid_deep_cstates_supported(void)
4393 4432 {
4394 4433 struct cpuid_info *cpi;
4395 4434 struct cpuid_regs regs;
4396 4435
4397 4436 ASSERT(cpuid_checkpass(CPU, 1));
4398 4437
4399 4438 cpi = CPU->cpu_m.mcpu_cpi;
4400 4439
4401 4440 if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4402 4441 return (0);
4403 4442
4404 4443 switch (cpi->cpi_vendor) {
4405 4444 case X86_VENDOR_Intel:
4406 4445 if (cpi->cpi_xmaxeax < 0x80000007)
4407 4446 return (0);
4408 4447
4409 4448 /*
4410 4449 * TSC run at a constant rate in all ACPI C-states?
4411 4450 */
4412 4451 regs.cp_eax = 0x80000007;
4413 4452 (void) __cpuid_insn(®s);
4414 4453 return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4415 4454
4416 4455 default:
4417 4456 return (0);
4418 4457 }
4419 4458 }
4420 4459
4421 4460 #endif /* !__xpv */
4422 4461
4423 4462 void
4424 4463 post_startup_cpu_fixups(void)
4425 4464 {
4426 4465 #ifndef __xpv
4427 4466 /*
4428 4467 * Some AMD processors support C1E state. Entering this state will
4429 4468 * cause the local APIC timer to stop, which we can't deal with at
4430 4469 * this time.
4431 4470 */
4432 4471 if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4433 4472 on_trap_data_t otd;
4434 4473 uint64_t reg;
4435 4474
4436 4475 if (!on_trap(&otd, OT_DATA_ACCESS)) {
4437 4476 reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4438 4477 /* Disable C1E state if it is enabled by BIOS */
4439 4478 if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4440 4479 AMD_ACTONCMPHALT_MASK) {
4441 4480 reg &= ~(AMD_ACTONCMPHALT_MASK <<
4442 4481 AMD_ACTONCMPHALT_SHIFT);
4443 4482 wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4444 4483 }
4445 4484 }
4446 4485 no_trap();
4447 4486 }
4448 4487 #endif /* !__xpv */
4449 4488 }
4450 4489
4451 4490 /*
4452 4491 * Setup necessary registers to enable XSAVE feature on this processor.
4453 4492 * This function needs to be called early enough, so that no xsave/xrstor
4454 4493 * ops will execute on the processor before the MSRs are properly set up.
4455 4494 *
4456 4495 * Current implementation has the following assumption:
4457 4496 * - cpuid_pass1() is done, so that X86 features are known.
4458 4497 * - fpu_probe() is done, so that fp_save_mech is chosen.
4459 4498 */
4460 4499 void
4461 4500 xsave_setup_msr(cpu_t *cpu)
4462 4501 {
4463 4502 ASSERT(fp_save_mech == FP_XSAVE);
4464 4503 ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4465 4504
4466 4505 /* Enable OSXSAVE in CR4. */
4467 4506 setcr4(getcr4() | CR4_OSXSAVE);
4468 4507 /*
4469 4508 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4470 4509 * correct value.
4471 4510 */
4472 4511 cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4473 4512 setup_xfem();
4474 4513 }
4475 4514
4476 4515 /*
4477 4516 * Starting with the Westmere processor the local
4478 4517 * APIC timer will continue running in all C-states,
4479 4518 * including the deepest C-states.
4480 4519 */
4481 4520 int
4482 4521 cpuid_arat_supported(void)
4483 4522 {
4484 4523 struct cpuid_info *cpi;
4485 4524 struct cpuid_regs regs;
4486 4525
4487 4526 ASSERT(cpuid_checkpass(CPU, 1));
4488 4527 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4489 4528
4490 4529 cpi = CPU->cpu_m.mcpu_cpi;
4491 4530
4492 4531 switch (cpi->cpi_vendor) {
4493 4532 case X86_VENDOR_Intel:
4494 4533 /*
4495 4534 * Always-running Local APIC Timer is
4496 4535 * indicated by CPUID.6.EAX[2].
4497 4536 */
4498 4537 if (cpi->cpi_maxeax >= 6) {
4499 4538 regs.cp_eax = 6;
4500 4539 (void) cpuid_insn(NULL, ®s);
4501 4540 return (regs.cp_eax & CPUID_CSTATE_ARAT);
4502 4541 } else {
4503 4542 return (0);
4504 4543 }
4505 4544 default:
4506 4545 return (0);
4507 4546 }
4508 4547 }
4509 4548
4510 4549 /*
4511 4550 * Check support for Intel ENERGY_PERF_BIAS feature
4512 4551 */
4513 4552 int
4514 4553 cpuid_iepb_supported(struct cpu *cp)
4515 4554 {
4516 4555 struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4517 4556 struct cpuid_regs regs;
4518 4557
4519 4558 ASSERT(cpuid_checkpass(cp, 1));
4520 4559
4521 4560 if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4522 4561 !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4523 4562 return (0);
4524 4563 }
4525 4564
4526 4565 /*
4527 4566 * Intel ENERGY_PERF_BIAS MSR is indicated by
4528 4567 * capability bit CPUID.6.ECX.3
4529 4568 */
4530 4569 if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4531 4570 return (0);
4532 4571
4533 4572 regs.cp_eax = 0x6;
4534 4573 (void) cpuid_insn(NULL, ®s);
4535 4574 return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4536 4575 }
4537 4576
4538 4577 /*
4539 4578 * Check support for TSC deadline timer
4540 4579 *
4541 4580 * TSC deadline timer provides a superior software programming
4542 4581 * model over local APIC timer that eliminates "time drifts".
4543 4582 * Instead of specifying a relative time, software specifies an
4544 4583 * absolute time as the target at which the processor should
4545 4584 * generate a timer event.
4546 4585 */
4547 4586 int
4548 4587 cpuid_deadline_tsc_supported(void)
4549 4588 {
4550 4589 struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4551 4590 struct cpuid_regs regs;
4552 4591
4553 4592 ASSERT(cpuid_checkpass(CPU, 1));
4554 4593 ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4555 4594
4556 4595 switch (cpi->cpi_vendor) {
4557 4596 case X86_VENDOR_Intel:
4558 4597 if (cpi->cpi_maxeax >= 1) {
4559 4598 regs.cp_eax = 1;
4560 4599 (void) cpuid_insn(NULL, ®s);
4561 4600 return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4562 4601 } else {
4563 4602 return (0);
4564 4603 }
4565 4604 default:
4566 4605 return (0);
4567 4606 }
4568 4607 }
4569 4608
4570 4609 #if defined(__amd64) && !defined(__xpv)
4571 4610 /*
4572 4611 * Patch in versions of bcopy for high performance Intel Nhm processors
4573 4612 * and later...
4574 4613 */
4575 4614 void
4576 4615 patch_memops(uint_t vendor)
4577 4616 {
4578 4617 size_t cnt, i;
4579 4618 caddr_t to, from;
4580 4619
4581 4620 if ((vendor == X86_VENDOR_Intel) &&
4582 4621 is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4583 4622 cnt = &bcopy_patch_end - &bcopy_patch_start;
4584 4623 to = &bcopy_ck_size;
4585 4624 from = &bcopy_patch_start;
4586 4625 for (i = 0; i < cnt; i++) {
4587 4626 *to++ = *from++;
4588 4627 }
4589 4628 }
4590 4629 }
4591 4630 #endif /* __amd64 && !__xpv */
4592 4631
4593 4632 /*
4594 4633 * This function finds the number of bits to represent the number of cores per
4595 4634 * chip and the number of strands per core for the Intel platforms.
4596 4635 * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4597 4636 */
4598 4637 void
4599 4638 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4600 4639 {
4601 4640 struct cpuid_regs regs;
4602 4641 struct cpuid_regs *cp = ®s;
4603 4642
4604 4643 if (vendor != X86_VENDOR_Intel) {
4605 4644 return;
4606 4645 }
4607 4646
4608 4647 /* if the cpuid level is 0xB, extended topo is available. */
4609 4648 cp->cp_eax = 0;
4610 4649 if (__cpuid_insn(cp) >= 0xB) {
4611 4650
4612 4651 cp->cp_eax = 0xB;
4613 4652 cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4614 4653 (void) __cpuid_insn(cp);
4615 4654
4616 4655 /*
4617 4656 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4618 4657 * indicates that the extended topology enumeration leaf is
4619 4658 * available.
4620 4659 */
4621 4660 if (cp->cp_ebx) {
4622 4661 uint_t coreid_shift = 0;
4623 4662 uint_t chipid_shift = 0;
4624 4663 uint_t i;
4625 4664 uint_t level;
4626 4665
4627 4666 for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4628 4667 cp->cp_eax = 0xB;
4629 4668 cp->cp_ecx = i;
4630 4669
4631 4670 (void) __cpuid_insn(cp);
4632 4671 level = CPI_CPU_LEVEL_TYPE(cp);
4633 4672
4634 4673 if (level == 1) {
4635 4674 /*
4636 4675 * Thread level processor topology
4637 4676 * Number of bits shift right APIC ID
4638 4677 * to get the coreid.
4639 4678 */
4640 4679 coreid_shift = BITX(cp->cp_eax, 4, 0);
4641 4680 } else if (level == 2) {
4642 4681 /*
4643 4682 * Core level processor topology
4644 4683 * Number of bits shift right APIC ID
4645 4684 * to get the chipid.
4646 4685 */
4647 4686 chipid_shift = BITX(cp->cp_eax, 4, 0);
4648 4687 }
4649 4688 }
4650 4689
4651 4690 if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4652 4691 *strand_nbits = coreid_shift;
4653 4692 *core_nbits = chipid_shift - coreid_shift;
4654 4693 }
4655 4694 }
4656 4695 }
4657 4696 }
↓ open down ↓ |
1641 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX