Print this page
8620 pcplusmp shouldn't support x2APIC mode
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/io/apix/apix.c
+++ new/usr/src/uts/i86pc/io/apix/apix.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * To understand how the apix module interacts with the interrupt subsystem read
35 35 * the theory statement in uts/i86pc/os/intr.c.
36 36 */
37 37
38 38 /*
39 39 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
40 40 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
41 41 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
42 42 * PSMI 1.5 extensions are supported in Solaris Nevada.
43 43 * PSMI 1.6 extensions are supported in Solaris Nevada.
44 44 * PSMI 1.7 extensions are supported in Solaris Nevada.
45 45 */
46 46 #define PSMI_1_7
47 47
48 48 #include <sys/processor.h>
49 49 #include <sys/time.h>
50 50 #include <sys/psm.h>
51 51 #include <sys/smp_impldefs.h>
52 52 #include <sys/cram.h>
53 53 #include <sys/acpi/acpi.h>
54 54 #include <sys/acpica.h>
55 55 #include <sys/psm_common.h>
56 56 #include <sys/pit.h>
57 57 #include <sys/ddi.h>
58 58 #include <sys/sunddi.h>
59 59 #include <sys/ddi_impldefs.h>
60 60 #include <sys/pci.h>
61 61 #include <sys/promif.h>
62 62 #include <sys/x86_archext.h>
63 63 #include <sys/cpc_impl.h>
64 64 #include <sys/uadmin.h>
65 65 #include <sys/panic.h>
66 66 #include <sys/debug.h>
67 67 #include <sys/archsystm.h>
68 68 #include <sys/trap.h>
69 69 #include <sys/machsystm.h>
70 70 #include <sys/sysmacros.h>
71 71 #include <sys/cpuvar.h>
72 72 #include <sys/rm_platter.h>
73 73 #include <sys/privregs.h>
74 74 #include <sys/note.h>
75 75 #include <sys/pci_intr_lib.h>
76 76 #include <sys/spl.h>
77 77 #include <sys/clock.h>
78 78 #include <sys/cyclic.h>
79 79 #include <sys/dditypes.h>
80 80 #include <sys/sunddi.h>
81 81 #include <sys/x_call.h>
82 82 #include <sys/reboot.h>
83 83 #include <sys/mach_intr.h>
84 84 #include <sys/apix.h>
85 85 #include <sys/apix_irm_impl.h>
86 86
87 87 static int apix_probe();
88 88 static void apix_init();
89 89 static void apix_picinit(void);
90 90 static int apix_intr_enter(int, int *);
91 91 static void apix_intr_exit(int, int);
92 92 static void apix_setspl(int);
93 93 static int apix_disable_intr(processorid_t);
94 94 static void apix_enable_intr(processorid_t);
95 95 static int apix_get_clkvect(int);
96 96 static int apix_get_ipivect(int, int);
97 97 static void apix_post_cyclic_setup(void *);
98 98 static int apix_post_cpu_start();
99 99 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
100 100 psm_intr_op_t, int *);
101 101
102 102 /*
103 103 * Helper functions for apix_intr_ops()
104 104 */
105 105 static void apix_redistribute_compute(void);
106 106 static int apix_get_pending(apix_vector_t *);
107 107 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
108 108 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
109 109 static char *apix_get_apic_type(void);
110 110 static int apix_intx_get_pending(int);
111 111 static void apix_intx_set_mask(int irqno);
112 112 static void apix_intx_clear_mask(int irqno);
113 113 static int apix_intx_get_shared(int irqno);
114 114 static void apix_intx_set_shared(int irqno, int delta);
115 115 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
116 116 struct intrspec *);
117 117 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
118 118
119 119 extern int apic_clkinit(int);
120 120
121 121 /* IRM initialization for APIX PSM module */
122 122 extern void apix_irm_init(void);
123 123
124 124 extern int irm_enable;
125 125
126 126 /*
127 127 * Local static data
128 128 */
129 129 static struct psm_ops apix_ops = {
130 130 apix_probe,
131 131
132 132 apix_init,
133 133 apix_picinit,
134 134 apix_intr_enter,
135 135 apix_intr_exit,
136 136 apix_setspl,
137 137 apix_addspl,
138 138 apix_delspl,
139 139 apix_disable_intr,
140 140 apix_enable_intr,
141 141 NULL, /* psm_softlvl_to_irq */
142 142 NULL, /* psm_set_softintr */
143 143
144 144 apic_set_idlecpu,
145 145 apic_unset_idlecpu,
146 146
147 147 apic_clkinit,
148 148 apix_get_clkvect,
149 149 NULL, /* psm_hrtimeinit */
150 150 apic_gethrtime,
151 151
152 152 apic_get_next_processorid,
153 153 apic_cpu_start,
154 154 apix_post_cpu_start,
155 155 apic_shutdown,
156 156 apix_get_ipivect,
157 157 apic_send_ipi,
158 158
159 159 NULL, /* psm_translate_irq */
160 160 NULL, /* psm_notify_error */
161 161 NULL, /* psm_notify_func */
162 162 apic_timer_reprogram,
163 163 apic_timer_enable,
164 164 apic_timer_disable,
165 165 apix_post_cyclic_setup,
166 166 apic_preshutdown,
167 167 apix_intr_ops, /* Advanced DDI Interrupt framework */
168 168 apic_state, /* save, restore apic state for S3 */
169 169 apic_cpu_ops, /* CPU control interface. */
170 170 };
171 171
172 172 struct psm_ops *psmops = &apix_ops;
173 173
174 174 static struct psm_info apix_psm_info = {
175 175 PSM_INFO_VER01_7, /* version */
176 176 PSM_OWN_EXCLUSIVE, /* ownership */
177 177 &apix_ops, /* operation */
178 178 APIX_NAME, /* machine name */
179 179 "apix MPv1.4 compatible",
180 180 };
181 181
182 182 static void *apix_hdlp;
183 183
184 184 static int apix_is_enabled = 0;
185 185
186 186 /*
187 187 * Flag to indicate if APIX is to be enabled only for platforms
188 188 * with specific hw feature(s).
189 189 */
190 190 int apix_hw_chk_enable = 1;
191 191
192 192 /*
193 193 * Hw features that are checked for enabling APIX support.
194 194 */
195 195 #define APIX_SUPPORT_X2APIC 0x00000001
196 196 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
197 197
198 198 /*
199 199 * apix_lock is used for cpu selection and vector re-binding
200 200 */
201 201 lock_t apix_lock;
202 202 apix_impl_t *apixs[NCPU];
203 203 /*
204 204 * Mapping between device interrupt and the allocated vector. Indexed
205 205 * by major number.
206 206 */
207 207 apix_dev_vector_t **apix_dev_vector;
208 208 /*
209 209 * Mapping between device major number and cpu id. It gets used
210 210 * when interrupt binding policy round robin with affinity is
211 211 * applied. With that policy, devices with the same major number
212 212 * will be bound to the same CPU.
213 213 */
↓ open down ↓ |
213 lines elided |
↑ open up ↑ |
214 214 processorid_t *apix_major_to_cpu; /* major to cpu mapping */
215 215 kmutex_t apix_mutex; /* for apix_dev_vector & apix_major_to_cpu */
216 216
217 217 int apix_nipis = 16; /* Maximum number of IPIs */
218 218 /*
219 219 * Maximum number of vectors in a CPU that can be used for interrupt
220 220 * allocation (including IPIs and the reserved vectors).
221 221 */
222 222 int apix_cpu_nvectors = APIX_NVECTOR;
223 223
224 +/* number of CPUs in power-on transition state */
225 +static int apic_poweron_cnt = 0;
226 +
224 227 /* gcpu.h */
225 228
226 229 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
227 230 extern void apic_change_eoi();
228 231
229 232 /*
230 233 * This is the loadable module wrapper
231 234 */
232 235
233 236 int
234 237 _init(void)
235 238 {
236 239 if (apic_coarse_hrtime)
237 240 apix_ops.psm_gethrtime = &apic_gettime;
238 241 return (psm_mod_init(&apix_hdlp, &apix_psm_info));
239 242 }
240 243
241 244 int
242 245 _fini(void)
243 246 {
244 247 return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
245 248 }
246 249
247 250 int
248 251 _info(struct modinfo *modinfop)
249 252 {
250 253 return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
251 254 }
252 255
253 256 static int
254 257 apix_probe()
255 258 {
256 259 int rval;
257 260
258 261 if (apix_enable == 0)
259 262 return (PSM_FAILURE);
260 263
261 264 /*
262 265 * FIXME Temporarily disable apix module on Xen HVM platform due to
263 266 * known hang during boot (see #3605).
264 267 *
265 268 * Please remove when/if the issue is resolved.
266 269 */
267 270 if (get_hwenv() == HW_XEN_HVM)
268 271 return (PSM_FAILURE);
269 272
270 273 /* check for hw features if specified */
271 274 if (apix_hw_chk_enable) {
272 275 /* check if x2APIC mode is supported */
273 276 if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
274 277 APIX_SUPPORT_X2APIC) {
275 278 if (apic_local_mode() == LOCAL_X2APIC) {
276 279 /* x2APIC mode activated by BIOS, switch ops */
277 280 apic_mode = LOCAL_X2APIC;
278 281 apic_change_ops();
279 282 } else if (!apic_detect_x2apic()) {
280 283 /* x2APIC mode is not supported in the hw */
281 284 apix_enable = 0;
282 285 }
283 286 }
284 287 if (apix_enable == 0)
285 288 return (PSM_FAILURE);
286 289 }
287 290
288 291 rval = apic_probe_common(apix_psm_info.p_mach_idstring);
289 292 if (rval == PSM_SUCCESS)
290 293 apix_is_enabled = 1;
291 294 else
292 295 apix_is_enabled = 0;
293 296 return (rval);
294 297 }
295 298
296 299 /*
297 300 * Initialize the data structures needed by pcplusmpx module.
298 301 * Specifically, the data structures used by addspl() and delspl()
299 302 * routines.
300 303 */
301 304 static void
302 305 apix_softinit()
303 306 {
304 307 int i, *iptr;
305 308 apix_impl_t *hdlp;
306 309 int nproc;
307 310
308 311 nproc = max(apic_nproc, apic_max_nproc);
309 312
310 313 hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
311 314 for (i = 0; i < nproc; i++) {
312 315 apixs[i] = &hdlp[i];
313 316 apixs[i]->x_cpuid = i;
314 317 LOCK_INIT_CLEAR(&apixs[i]->x_lock);
315 318 }
316 319
317 320 /* cpu 0 is always up (for now) */
318 321 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
319 322
320 323 iptr = (int *)&apic_irq_table[0];
321 324 for (i = 0; i <= APIC_MAX_VECTOR; i++) {
322 325 apic_level_intr[i] = 0;
323 326 *iptr++ = NULL;
324 327 }
325 328 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
326 329
327 330 apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
328 331 KM_SLEEP);
329 332
330 333 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
331 334 apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
332 335 KM_SLEEP);
333 336 for (i = 0; i < devcnt; i++)
334 337 apix_major_to_cpu[i] = IRQ_UNINIT;
335 338 }
336 339
337 340 mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
338 341 }
339 342
340 343 static int
341 344 apix_get_pending_spl(void)
342 345 {
343 346 int cpuid = CPU->cpu_id;
344 347
345 348 return (bsrw_insn(apixs[cpuid]->x_intr_pending));
346 349 }
347 350
348 351 static uintptr_t
349 352 apix_get_intr_handler(int cpu, short vec)
350 353 {
351 354 apix_vector_t *apix_vector;
352 355
353 356 ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
354 357 if (cpu >= apic_nproc)
355 358 return (NULL);
356 359
357 360 apix_vector = apixs[cpu]->x_vectbl[vec];
358 361
359 362 return ((uintptr_t)(apix_vector->v_autovect));
360 363 }
361 364
362 365 static void
363 366 apix_init()
364 367 {
365 368 extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
366 369
367 370 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
368 371
369 372 do_interrupt_common = apix_do_interrupt;
370 373 addintr = apix_add_avintr;
371 374 remintr = apix_rem_avintr;
372 375 get_pending_spl = apix_get_pending_spl;
373 376 get_intr_handler = apix_get_intr_handler;
374 377 psm_get_localapicid = apic_get_localapicid;
375 378 psm_get_ioapicid = apic_get_ioapicid;
376 379
377 380 apix_softinit();
378 381
379 382 #if !defined(__amd64)
380 383 if (cpuid_have_cr8access(CPU))
381 384 apic_have_32bit_cr8 = 1;
382 385 #endif
383 386
384 387 /*
385 388 * Initialize IRM pool parameters
386 389 */
387 390 if (irm_enable) {
388 391 int i;
389 392 int lowest_irq;
390 393 int highest_irq;
391 394
392 395 /* number of CPUs present */
393 396 apix_irminfo.apix_ncpus = apic_nproc;
394 397 /* total number of entries in all of the IOAPICs present */
395 398 lowest_irq = apic_io_vectbase[0];
396 399 highest_irq = apic_io_vectend[0];
397 400 for (i = 1; i < apic_io_max; i++) {
398 401 if (apic_io_vectbase[i] < lowest_irq)
399 402 lowest_irq = apic_io_vectbase[i];
400 403 if (apic_io_vectend[i] > highest_irq)
401 404 highest_irq = apic_io_vectend[i];
402 405 }
403 406 apix_irminfo.apix_ioapic_max_vectors =
404 407 highest_irq - lowest_irq + 1;
405 408 /*
406 409 * Number of available per-CPU vectors excluding
407 410 * reserved vectors for Dtrace, int80, system-call,
408 411 * fast-trap, etc.
409 412 */
410 413 apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
411 414 APIX_SW_RESERVED_VECTORS;
412 415
413 416 /* Number of vectors (pre) allocated (SCI and HPET) */
414 417 apix_irminfo.apix_vectors_allocated = 0;
415 418 if (apic_hpet_vect != -1)
416 419 apix_irminfo.apix_vectors_allocated++;
417 420 if (apic_sci_vect != -1)
418 421 apix_irminfo.apix_vectors_allocated++;
419 422 }
420 423 }
421 424
422 425 static void
423 426 apix_init_intr()
424 427 {
425 428 processorid_t cpun = psm_get_cpu_id();
426 429 uint_t nlvt;
427 430 uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
428 431 extern void cmi_cmci_trap(void);
429 432
430 433 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
431 434
432 435 if (apic_mode == LOCAL_APIC) {
433 436 /*
434 437 * We are running APIC in MMIO mode.
435 438 */
436 439 if (apic_flat_model) {
437 440 apic_reg_ops->apic_write(APIC_FORMAT_REG,
438 441 APIC_FLAT_MODEL);
439 442 } else {
440 443 apic_reg_ops->apic_write(APIC_FORMAT_REG,
441 444 APIC_CLUSTER_MODEL);
442 445 }
443 446
444 447 apic_reg_ops->apic_write(APIC_DEST_REG,
445 448 AV_HIGH_ORDER >> cpun);
446 449 }
447 450
448 451 if (apic_directed_EOI_supported()) {
449 452 /*
450 453 * Setting the 12th bit in the Spurious Interrupt Vector
451 454 * Register suppresses broadcast EOIs generated by the local
452 455 * APIC. The suppression of broadcast EOIs happens only when
453 456 * interrupts are level-triggered.
454 457 */
455 458 svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
456 459 }
457 460
458 461 /* need to enable APIC before unmasking NMI */
459 462 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
460 463
461 464 /*
462 465 * Presence of an invalid vector with delivery mode AV_FIXED can
463 466 * cause an error interrupt, even if the entry is masked...so
464 467 * write a valid vector to LVT entries along with the mask bit
465 468 */
466 469
467 470 /* All APICs have timer and LINT0/1 */
468 471 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
469 472 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
470 473 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI); /* enable NMI */
471 474
472 475 /*
473 476 * On integrated APICs, the number of LVT entries is
474 477 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
475 478 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
476 479 */
477 480
478 481 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
479 482 nlvt = 3;
480 483 } else {
481 484 nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
482 485 0xFF) + 1;
483 486 }
484 487
485 488 if (nlvt >= 5) {
486 489 /* Enable performance counter overflow interrupt */
487 490
488 491 if (!is_x86_feature(x86_featureset, X86FSET_MSR))
489 492 apic_enable_cpcovf_intr = 0;
490 493 if (apic_enable_cpcovf_intr) {
491 494 if (apic_cpcovf_vect == 0) {
492 495 int ipl = APIC_PCINT_IPL;
493 496
494 497 apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
495 498 ASSERT(apic_cpcovf_vect);
496 499
497 500 (void) add_avintr(NULL, ipl,
498 501 (avfunc)kcpc_hw_overflow_intr,
499 502 "apic pcint", apic_cpcovf_vect,
500 503 NULL, NULL, NULL, NULL);
501 504 kcpc_hw_overflow_intr_installed = 1;
502 505 kcpc_hw_enable_cpc_intr =
503 506 apic_cpcovf_mask_clear;
504 507 }
505 508 apic_reg_ops->apic_write(APIC_PCINT_VECT,
506 509 apic_cpcovf_vect);
507 510 }
508 511 }
509 512
510 513 if (nlvt >= 6) {
511 514 /* Only mask TM intr if the BIOS apparently doesn't use it */
512 515
513 516 uint32_t lvtval;
514 517
515 518 lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
516 519 if (((lvtval & AV_MASK) == AV_MASK) ||
517 520 ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
518 521 apic_reg_ops->apic_write(APIC_THERM_VECT,
519 522 AV_MASK|APIC_RESV_IRQ);
520 523 }
521 524 }
522 525
523 526 /* Enable error interrupt */
524 527
525 528 if (nlvt >= 4 && apic_enable_error_intr) {
526 529 if (apic_errvect == 0) {
527 530 int ipl = 0xf; /* get highest priority intr */
528 531 apic_errvect = apix_get_ipivect(ipl, -1);
529 532 ASSERT(apic_errvect);
530 533 /*
531 534 * Not PSMI compliant, but we are going to merge
532 535 * with ON anyway
533 536 */
534 537 (void) add_avintr(NULL, ipl,
535 538 (avfunc)apic_error_intr, "apic error intr",
536 539 apic_errvect, NULL, NULL, NULL, NULL);
537 540 }
538 541 apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
539 542 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
540 543 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
541 544 }
542 545
543 546 /* Enable CMCI interrupt */
544 547 if (cmi_enable_cmci) {
545 548 mutex_enter(&cmci_cpu_setup_lock);
546 549 if (cmci_cpu_setup_registered == 0) {
547 550 mutex_enter(&cpu_lock);
548 551 register_cpu_setup_func(cmci_cpu_setup, NULL);
549 552 mutex_exit(&cpu_lock);
550 553 cmci_cpu_setup_registered = 1;
551 554 }
552 555 mutex_exit(&cmci_cpu_setup_lock);
553 556
554 557 if (apic_cmci_vect == 0) {
555 558 int ipl = 0x2;
556 559 apic_cmci_vect = apix_get_ipivect(ipl, -1);
557 560 ASSERT(apic_cmci_vect);
558 561
559 562 (void) add_avintr(NULL, ipl,
560 563 (avfunc)cmi_cmci_trap, "apic cmci intr",
561 564 apic_cmci_vect, NULL, NULL, NULL, NULL);
562 565 }
563 566 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
564 567 }
565 568
566 569 apic_reg_ops->apic_write_task_reg(0);
567 570 }
568 571
569 572 static void
570 573 apix_picinit(void)
571 574 {
572 575 int i, j;
573 576 uint_t isr;
574 577
575 578 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
576 579
577 580 /*
578 581 * initialize interrupt remapping before apic
579 582 * hardware initialization
580 583 */
581 584 apic_intrmap_init(apic_mode);
582 585 if (apic_vt_ops == psm_vt_ops)
583 586 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
584 587
585 588 /*
586 589 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
587 590 * bit on without clearing it with EOI. Since softint
588 591 * uses vector 0x20 to interrupt itself, so softint will
589 592 * not work on this machine. In order to fix this problem
590 593 * a check is made to verify all the isr bits are clear.
591 594 * If not, EOIs are issued to clear the bits.
592 595 */
593 596 for (i = 7; i >= 1; i--) {
594 597 isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
595 598 if (isr != 0)
596 599 for (j = 0; ((j < 32) && (isr != 0)); j++)
597 600 if (isr & (1 << j)) {
598 601 apic_reg_ops->apic_write(
599 602 APIC_EOI_REG, 0);
600 603 isr &= ~(1 << j);
601 604 apic_error |= APIC_ERR_BOOT_EOI;
602 605 }
603 606 }
604 607
605 608 /* set a flag so we know we have run apic_picinit() */
606 609 apic_picinit_called = 1;
607 610 LOCK_INIT_CLEAR(&apic_gethrtime_lock);
608 611 LOCK_INIT_CLEAR(&apic_ioapic_lock);
609 612 LOCK_INIT_CLEAR(&apic_error_lock);
610 613 LOCK_INIT_CLEAR(&apic_mode_switch_lock);
611 614
612 615 picsetup(); /* initialise the 8259 */
613 616
614 617 /* add nmi handler - least priority nmi handler */
615 618 LOCK_INIT_CLEAR(&apic_nmi_lock);
616 619
617 620 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
618 621 "apix NMI handler", (caddr_t)NULL))
619 622 cmn_err(CE_WARN, "apix: Unable to add nmi handler");
620 623
621 624 apix_init_intr();
622 625
623 626 /* enable apic mode if imcr present */
624 627 if (apic_imcrp) {
625 628 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
626 629 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
627 630 }
628 631
629 632 ioapix_init_intr(IOAPIC_MASK);
630 633
631 634 /* setup global IRM pool if applicable */
632 635 if (irm_enable)
633 636 apix_irm_init();
634 637 }
635 638
636 639 static __inline__ void
637 640 apix_send_eoi(void)
638 641 {
639 642 if (apic_mode == LOCAL_APIC)
640 643 LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
641 644 else
642 645 X2APIC_WRITE(APIC_EOI_REG, 0);
643 646 }
644 647
645 648 /*
646 649 * platform_intr_enter
647 650 *
648 651 * Called at the beginning of the interrupt service routine, but unlike
649 652 * pcplusmp, does not mask interrupts. An EOI is given to the interrupt
650 653 * controller to enable other HW interrupts but interrupts are still
651 654 * masked by the IF flag.
652 655 *
653 656 * Return -1 for spurious interrupts
654 657 *
655 658 */
656 659 static int
657 660 apix_intr_enter(int ipl, int *vectorp)
658 661 {
659 662 struct cpu *cpu = CPU;
660 663 uint32_t cpuid = CPU->cpu_id;
661 664 apic_cpus_info_t *cpu_infop;
662 665 uchar_t vector;
663 666 apix_vector_t *vecp;
664 667 int nipl = -1;
665 668
666 669 /*
667 670 * The real vector delivered is (*vectorp + 0x20), but our caller
668 671 * subtracts 0x20 from the vector before passing it to us.
669 672 * (That's why APIC_BASE_VECT is 0x20.)
670 673 */
671 674 vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
672 675
673 676 cpu_infop = &apic_cpus[cpuid];
674 677 if (vector == APIC_SPUR_INTR) {
675 678 cpu_infop->aci_spur_cnt++;
676 679 return (APIC_INT_SPURIOUS);
677 680 }
678 681
679 682 vecp = xv_vector(cpuid, vector);
680 683 if (vecp == NULL) {
681 684 if (APIX_IS_FAKE_INTR(vector))
682 685 nipl = apix_rebindinfo.i_pri;
683 686 apix_send_eoi();
684 687 return (nipl);
685 688 }
686 689 nipl = vecp->v_pri;
687 690
688 691 /* if interrupted by the clock, increment apic_nsec_since_boot */
689 692 if (vector == (apic_clkvect + APIC_BASE_VECT)) {
690 693 if (!apic_oneshot) {
691 694 /* NOTE: this is not MT aware */
692 695 apic_hrtime_stamp++;
693 696 apic_nsec_since_boot += apic_nsec_per_intr;
694 697 apic_hrtime_stamp++;
695 698 last_count_read = apic_hertz_count;
696 699 apix_redistribute_compute();
697 700 }
698 701
699 702 apix_send_eoi();
700 703
701 704 return (nipl);
702 705 }
703 706
704 707 ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
705 708
706 709 /* pre-EOI handling for level-triggered interrupts */
707 710 if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
708 711 (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
709 712 apix_level_intr_pre_eoi(vecp->v_inum);
710 713
711 714 /* send back EOI */
712 715 apix_send_eoi();
713 716
714 717 cpu_infop->aci_current[nipl] = vector;
715 718 if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
716 719 cpu_infop->aci_curipl = (uchar_t)nipl;
717 720 cpu_infop->aci_ISR_in_progress |= 1 << nipl;
718 721 }
719 722
720 723 #ifdef DEBUG
721 724 if (vector >= APIX_IPI_MIN)
722 725 return (nipl); /* skip IPI */
723 726
724 727 APIC_DEBUG_BUF_PUT(vector);
725 728 APIC_DEBUG_BUF_PUT(vecp->v_inum);
726 729 APIC_DEBUG_BUF_PUT(nipl);
727 730 APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
728 731 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
729 732 drv_usecwait(apic_stretch_interrupts);
730 733 #endif /* DEBUG */
731 734
732 735 return (nipl);
733 736 }
734 737
735 738 /*
736 739 * Any changes made to this function must also change X2APIC
737 740 * version of intr_exit.
738 741 */
739 742 static void
740 743 apix_intr_exit(int prev_ipl, int arg2)
741 744 {
742 745 int cpuid = psm_get_cpu_id();
743 746 apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
744 747 apix_impl_t *apixp = apixs[cpuid];
745 748
746 749 UNREFERENCED_1PARAMETER(arg2);
747 750
748 751 cpu_infop->aci_curipl = (uchar_t)prev_ipl;
749 752 /* ISR above current pri could not be in progress */
750 753 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
751 754
752 755 if (apixp->x_obsoletes != NULL) {
753 756 if (APIX_CPU_LOCK_HELD(cpuid))
754 757 return;
755 758
756 759 APIX_ENTER_CPU_LOCK(cpuid);
757 760 (void) apix_obsolete_vector(apixp->x_obsoletes);
758 761 APIX_LEAVE_CPU_LOCK(cpuid);
759 762 }
760 763 }
761 764
762 765 /*
763 766 * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
764 767 * given ipl, but apix never uses the TPR and we never mask a subset of the
765 768 * interrupts. They are either all blocked by the IF flag or all can come in.
766 769 *
767 770 * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
768 771 * can come in if currently enabled by the IF flag. This table shows the state
769 772 * of the IF flag when we leave this function.
770 773 *
771 774 * curr IF | ipl == 15 ipl != 15
772 775 * --------+---------------------------
773 776 * 0 | 0 0
774 777 * 1 | 0 1
775 778 */
776 779 static void
777 780 apix_setspl(int ipl)
778 781 {
779 782 /*
780 783 * Interrupts at ipl above this cannot be in progress, so the following
781 784 * mask is ok.
782 785 */
783 786 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
784 787
785 788 if (ipl == XC_HI_PIL)
786 789 cli();
787 790 }
788 791
789 792 int
790 793 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
791 794 {
792 795 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
793 796 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
794 797 apix_vector_t *vecp = xv_vector(cpuid, vector);
795 798
796 799 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
797 800 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
798 801
799 802 if (vecp->v_type == APIX_TYPE_FIXED)
800 803 apix_intx_set_shared(vecp->v_inum, 1);
801 804
802 805 /* There are more interrupts, so it's already been enabled */
803 806 if (vecp->v_share > 1)
804 807 return (PSM_SUCCESS);
805 808
806 809 /* return if it is not hardware interrupt */
807 810 if (vecp->v_type == APIX_TYPE_IPI)
808 811 return (PSM_SUCCESS);
809 812
810 813 /*
811 814 * if apix_picinit() has not been called yet, just return.
812 815 * At the end of apic_picinit(), we will call setup_io_intr().
813 816 */
814 817 if (!apic_picinit_called)
815 818 return (PSM_SUCCESS);
816 819
817 820 (void) apix_setup_io_intr(vecp);
818 821
819 822 return (PSM_SUCCESS);
820 823 }
821 824
822 825 int
823 826 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
824 827 {
825 828 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
826 829 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
827 830 apix_vector_t *vecp = xv_vector(cpuid, vector);
828 831
829 832 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
830 833 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
831 834
832 835 if (vecp->v_type == APIX_TYPE_FIXED)
833 836 apix_intx_set_shared(vecp->v_inum, -1);
834 837
835 838 /* There are more interrupts */
836 839 if (vecp->v_share > 1)
837 840 return (PSM_SUCCESS);
838 841
839 842 /* return if it is not hardware interrupt */
840 843 if (vecp->v_type == APIX_TYPE_IPI)
841 844 return (PSM_SUCCESS);
842 845
843 846 if (!apic_picinit_called) {
844 847 cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
845 848 virtvec);
846 849 return (PSM_SUCCESS);
847 850 }
848 851
849 852 apix_disable_vector(vecp);
850 853
851 854 return (PSM_SUCCESS);
852 855 }
853 856
854 857 /*
855 858 * Try and disable all interrupts. We just assign interrupts to other
856 859 * processors based on policy. If any were bound by user request, we
857 860 * let them continue and return failure. We do not bother to check
858 861 * for cache affinity while rebinding.
859 862 */
860 863 static int
861 864 apix_disable_intr(processorid_t cpun)
862 865 {
863 866 apix_impl_t *apixp = apixs[cpun];
864 867 apix_vector_t *vecp, *newp;
865 868 int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
866 869
867 870 lock_set(&apix_lock);
868 871
869 872 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
870 873 apic_cpus[cpun].aci_curipl = 0;
871 874
872 875 /* if this is for SUSPEND operation, skip rebinding */
873 876 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
874 877 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
875 878 vecp = apixp->x_vectbl[i];
876 879 if (!IS_VECT_ENABLED(vecp))
877 880 continue;
878 881
879 882 apix_disable_vector(vecp);
880 883 }
881 884 lock_clear(&apix_lock);
882 885 return (PSM_SUCCESS);
883 886 }
884 887
885 888 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
886 889 vecp = apixp->x_vectbl[i];
887 890 if (!IS_VECT_ENABLED(vecp))
888 891 continue;
889 892
890 893 if (vecp->v_flags & APIX_VECT_USER_BOUND) {
891 894 hardbound++;
892 895 continue;
893 896 }
894 897 type = vecp->v_type;
895 898
896 899 /*
897 900 * If there are bound interrupts on this cpu, then
898 901 * rebind them to other processors.
899 902 */
900 903 loop = 0;
901 904 do {
902 905 bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
903 906
904 907 if (type != APIX_TYPE_MSI)
905 908 newp = apix_set_cpu(vecp, bindcpu, &ret);
906 909 else
907 910 newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
908 911 } while ((newp == NULL) && (loop++ < apic_nproc));
909 912
910 913 if (loop >= apic_nproc) {
911 914 errbound++;
912 915 cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
913 916 vecp->v_cpuid, vecp->v_vector);
914 917 }
915 918 }
916 919
917 920 lock_clear(&apix_lock);
918 921
919 922 if (hardbound || errbound) {
920 923 cmn_err(CE_WARN, "Could not disable interrupts on %d"
921 924 "due to user bound interrupts or failed operation",
922 925 cpun);
923 926 return (PSM_FAILURE);
924 927 }
925 928
926 929 return (PSM_SUCCESS);
927 930 }
928 931
929 932 /*
930 933 * Bind interrupts to specified CPU
931 934 */
932 935 static void
933 936 apix_enable_intr(processorid_t cpun)
934 937 {
935 938 apix_vector_t *vecp;
936 939 int i, ret;
937 940 processorid_t n;
938 941
939 942 lock_set(&apix_lock);
940 943
941 944 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
942 945
943 946 /* interrupt enabling for system resume */
944 947 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
945 948 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
946 949 vecp = xv_vector(cpun, i);
947 950 if (!IS_VECT_ENABLED(vecp))
948 951 continue;
949 952
950 953 apix_enable_vector(vecp);
951 954 }
952 955 apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
953 956 }
954 957
955 958 for (n = 0; n < apic_nproc; n++) {
956 959 if (!apic_cpu_in_range(n) || n == cpun ||
957 960 (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
958 961 continue;
959 962
960 963 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
961 964 vecp = xv_vector(n, i);
962 965 if (!IS_VECT_ENABLED(vecp) ||
963 966 vecp->v_bound_cpuid != cpun)
964 967 continue;
965 968
966 969 if (vecp->v_type != APIX_TYPE_MSI)
967 970 (void) apix_set_cpu(vecp, cpun, &ret);
968 971 else
969 972 (void) apix_grp_set_cpu(vecp, cpun, &ret);
970 973 }
971 974 }
972 975
973 976 lock_clear(&apix_lock);
974 977 }
975 978
976 979 /*
977 980 * Allocate vector for IPI
978 981 * type == -1 indicates it is an internal request. Do not change
979 982 * resv_vector for these requests.
980 983 */
981 984 static int
982 985 apix_get_ipivect(int ipl, int type)
983 986 {
984 987 uchar_t vector;
985 988
986 989 if ((vector = apix_alloc_ipi(ipl)) > 0) {
987 990 if (type != -1)
988 991 apic_resv_vector[ipl] = vector;
989 992 return (vector);
990 993 }
991 994 apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
992 995 return (-1); /* shouldn't happen */
993 996 }
994 997
995 998 static int
996 999 apix_get_clkvect(int ipl)
997 1000 {
998 1001 int vector;
999 1002
1000 1003 if ((vector = apix_get_ipivect(ipl, -1)) == -1)
1001 1004 return (-1);
1002 1005
1003 1006 apic_clkvect = vector - APIC_BASE_VECT;
1004 1007 APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1005 1008 apic_clkvect));
1006 1009 return (vector);
1007 1010 }
1008 1011
1009 1012 static int
1010 1013 apix_post_cpu_start()
1011 1014 {
1012 1015 int cpun;
1013 1016 static int cpus_started = 1;
1014 1017
1015 1018 /* We know this CPU + BSP started successfully. */
1016 1019 cpus_started++;
1017 1020
1018 1021 /*
1019 1022 * On BSP we would have enabled X2APIC, if supported by processor,
1020 1023 * in acpi_probe(), but on AP we do it here.
1021 1024 *
1022 1025 * We enable X2APIC mode only if BSP is running in X2APIC & the
1023 1026 * local APIC mode of the current CPU is MMIO (xAPIC).
1024 1027 */
1025 1028 if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1026 1029 apic_local_mode() == LOCAL_APIC) {
1027 1030 apic_enable_x2apic();
1028 1031 }
1029 1032
1030 1033 /*
1031 1034 * Switch back to x2apic IPI sending method for performance when target
1032 1035 * CPU has entered x2apic mode.
1033 1036 */
1034 1037 if (apic_mode == LOCAL_X2APIC) {
1035 1038 apic_switch_ipi_callback(B_FALSE);
1036 1039 }
1037 1040
1038 1041 splx(ipltospl(LOCK_LEVEL));
1039 1042 apix_init_intr();
1040 1043
1041 1044 /*
1042 1045 * since some systems don't enable the internal cache on the non-boot
1043 1046 * cpus, so we have to enable them here
1044 1047 */
1045 1048 setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1046 1049
1047 1050 #ifdef DEBUG
1048 1051 APIC_AV_PENDING_SET();
1049 1052 #else
1050 1053 if (apic_mode == LOCAL_APIC)
1051 1054 APIC_AV_PENDING_SET();
1052 1055 #endif /* DEBUG */
1053 1056
1054 1057 /*
1055 1058 * We may be booting, or resuming from suspend; aci_status will
1056 1059 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1057 1060 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1058 1061 */
1059 1062 cpun = psm_get_cpu_id();
1060 1063 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1061 1064
1062 1065 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1063 1066
1064 1067 return (PSM_SUCCESS);
1065 1068 }
1066 1069
1067 1070 /*
1068 1071 * If this module needs a periodic handler for the interrupt distribution, it
1069 1072 * can be added here. The argument to the periodic handler is not currently
1070 1073 * used, but is reserved for future.
1071 1074 */
1072 1075 static void
1073 1076 apix_post_cyclic_setup(void *arg)
1074 1077 {
1075 1078 UNREFERENCED_1PARAMETER(arg);
1076 1079
1077 1080 cyc_handler_t cyh;
1078 1081 cyc_time_t cyt;
1079 1082
1080 1083 /* cpu_lock is held */
1081 1084 /* set up a periodic handler for intr redistribution */
1082 1085
1083 1086 /*
1084 1087 * In peridoc mode intr redistribution processing is done in
1085 1088 * apic_intr_enter during clk intr processing
1086 1089 */
1087 1090 if (!apic_oneshot)
1088 1091 return;
1089 1092
1090 1093 /*
1091 1094 * Register a periodical handler for the redistribution processing.
1092 1095 * Though we would generally prefer to use the DDI interface for
1093 1096 * periodic handler invocation, ddi_periodic_add(9F), we are
1094 1097 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1095 1098 * attempt to take for us. Thus, we add our own cyclic directly:
1096 1099 */
1097 1100 cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1098 1101 cyh.cyh_arg = NULL;
1099 1102 cyh.cyh_level = CY_LOW_LEVEL;
1100 1103
1101 1104 cyt.cyt_when = 0;
1102 1105 cyt.cyt_interval = apic_redistribute_sample_interval;
1103 1106
1104 1107 apic_cyclic_id = cyclic_add(&cyh, &cyt);
1105 1108 }
1106 1109
1107 1110 /*
1108 1111 * Called the first time we enable x2apic mode on this cpu.
1109 1112 * Update some of the function pointers to use x2apic routines.
1110 1113 */
1111 1114 void
1112 1115 x2apic_update_psm()
1113 1116 {
1114 1117 struct psm_ops *pops = &apix_ops;
1115 1118
1116 1119 ASSERT(pops != NULL);
1117 1120
1118 1121 /*
1119 1122 * The pcplusmp module's version of x2apic_update_psm makes additional
1120 1123 * changes that we do not have to make here. It needs to make those
1121 1124 * changes because pcplusmp relies on the TPR register and the means of
1122 1125 * addressing that changes when using the local apic versus the x2apic.
1123 1126 * It's also worth noting that the apix driver specific function end up
1124 1127 * being apix_foo as opposed to apic_foo and x2apic_foo.
1125 1128 */
1126 1129 pops->psm_send_ipi = x2apic_send_ipi;
1127 1130
1128 1131 send_dirintf = pops->psm_send_ipi;
1129 1132
1130 1133 apic_mode = LOCAL_X2APIC;
1131 1134 apic_change_ops();
1132 1135 }
1133 1136
1134 1137 /*
1135 1138 * This function provides external interface to the nexus for all
1136 1139 * functionalities related to the new DDI interrupt framework.
1137 1140 *
1138 1141 * Input:
1139 1142 * dip - pointer to the dev_info structure of the requested device
1140 1143 * hdlp - pointer to the internal interrupt handle structure for the
1141 1144 * requested interrupt
1142 1145 * intr_op - opcode for this call
1143 1146 * result - pointer to the integer that will hold the result to be
1144 1147 * passed back if return value is PSM_SUCCESS
1145 1148 *
1146 1149 * Output:
1147 1150 * return value is either PSM_SUCCESS or PSM_FAILURE
1148 1151 */
1149 1152 static int
1150 1153 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1151 1154 psm_intr_op_t intr_op, int *result)
1152 1155 {
1153 1156 int cap;
1154 1157 apix_vector_t *vecp, *newvecp;
1155 1158 struct intrspec *ispec, intr_spec;
1156 1159 processorid_t target;
1157 1160
1158 1161 ispec = &intr_spec;
1159 1162 ispec->intrspec_pri = hdlp->ih_pri;
1160 1163 ispec->intrspec_vec = hdlp->ih_inum;
1161 1164 ispec->intrspec_func = hdlp->ih_cb_func;
1162 1165
1163 1166 switch (intr_op) {
1164 1167 case PSM_INTR_OP_ALLOC_VECTORS:
1165 1168 switch (hdlp->ih_type) {
1166 1169 case DDI_INTR_TYPE_MSI:
1167 1170 /* allocate MSI vectors */
1168 1171 *result = apix_alloc_msi(dip, hdlp->ih_inum,
1169 1172 hdlp->ih_scratch1,
1170 1173 (int)(uintptr_t)hdlp->ih_scratch2);
1171 1174 break;
1172 1175 case DDI_INTR_TYPE_MSIX:
1173 1176 /* allocate MSI-X vectors */
1174 1177 *result = apix_alloc_msix(dip, hdlp->ih_inum,
1175 1178 hdlp->ih_scratch1,
1176 1179 (int)(uintptr_t)hdlp->ih_scratch2);
1177 1180 break;
1178 1181 case DDI_INTR_TYPE_FIXED:
1179 1182 /* allocate or share vector for fixed */
1180 1183 if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1181 1184 return (PSM_FAILURE);
1182 1185 }
1183 1186 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1184 1187 *result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1185 1188 ispec);
1186 1189 break;
1187 1190 default:
1188 1191 return (PSM_FAILURE);
1189 1192 }
1190 1193 break;
1191 1194 case PSM_INTR_OP_FREE_VECTORS:
1192 1195 apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1193 1196 hdlp->ih_type);
1194 1197 break;
1195 1198 case PSM_INTR_OP_XLATE_VECTOR:
1196 1199 /*
1197 1200 * Vectors are allocated by ALLOC and freed by FREE.
1198 1201 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1199 1202 */
1200 1203 *result = APIX_INVALID_VECT;
1201 1204 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1202 1205 if (vecp != NULL) {
1203 1206 *result = APIX_VIRTVECTOR(vecp->v_cpuid,
1204 1207 vecp->v_vector);
1205 1208 break;
1206 1209 }
1207 1210
1208 1211 /*
1209 1212 * No vector to device mapping exists. If this is FIXED type
1210 1213 * then check if this IRQ is already mapped for another device
1211 1214 * then return the vector number for it (i.e. shared IRQ case).
1212 1215 * Otherwise, return PSM_FAILURE.
1213 1216 */
1214 1217 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1215 1218 vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1216 1219 ispec);
1217 1220 *result = (vecp == NULL) ? APIX_INVALID_VECT :
1218 1221 APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1219 1222 }
1220 1223 if (*result == APIX_INVALID_VECT)
1221 1224 return (PSM_FAILURE);
1222 1225 break;
1223 1226 case PSM_INTR_OP_GET_PENDING:
1224 1227 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1225 1228 if (vecp == NULL)
1226 1229 return (PSM_FAILURE);
1227 1230
1228 1231 *result = apix_get_pending(vecp);
1229 1232 break;
1230 1233 case PSM_INTR_OP_CLEAR_MASK:
1231 1234 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1232 1235 return (PSM_FAILURE);
1233 1236
1234 1237 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1235 1238 if (vecp == NULL)
1236 1239 return (PSM_FAILURE);
1237 1240
1238 1241 apix_intx_clear_mask(vecp->v_inum);
1239 1242 break;
1240 1243 case PSM_INTR_OP_SET_MASK:
1241 1244 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1242 1245 return (PSM_FAILURE);
1243 1246
1244 1247 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1245 1248 if (vecp == NULL)
1246 1249 return (PSM_FAILURE);
1247 1250
1248 1251 apix_intx_set_mask(vecp->v_inum);
1249 1252 break;
1250 1253 case PSM_INTR_OP_GET_SHARED:
1251 1254 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1252 1255 return (PSM_FAILURE);
1253 1256
1254 1257 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1255 1258 if (vecp == NULL)
1256 1259 return (PSM_FAILURE);
1257 1260
1258 1261 *result = apix_intx_get_shared(vecp->v_inum);
1259 1262 break;
1260 1263 case PSM_INTR_OP_SET_PRI:
1261 1264 /*
1262 1265 * Called prior to adding the interrupt handler or when
1263 1266 * an interrupt handler is unassigned.
1264 1267 */
1265 1268 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1266 1269 return (PSM_SUCCESS);
1267 1270
1268 1271 if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1269 1272 return (PSM_FAILURE);
1270 1273
1271 1274 break;
1272 1275 case PSM_INTR_OP_SET_CPU:
1273 1276 case PSM_INTR_OP_GRP_SET_CPU:
1274 1277 /*
1275 1278 * The interrupt handle given here has been allocated
1276 1279 * specifically for this command, and ih_private carries
1277 1280 * a CPU value.
1278 1281 */
1279 1282 *result = EINVAL;
1280 1283 target = (int)(intptr_t)hdlp->ih_private;
1281 1284 if (!apic_cpu_in_range(target)) {
1282 1285 DDI_INTR_IMPLDBG((CE_WARN,
1283 1286 "[grp_]set_cpu: cpu out of range: %d\n", target));
1284 1287 return (PSM_FAILURE);
1285 1288 }
1286 1289
1287 1290 lock_set(&apix_lock);
1288 1291
1289 1292 vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1290 1293 if (!IS_VECT_ENABLED(vecp)) {
1291 1294 DDI_INTR_IMPLDBG((CE_WARN,
1292 1295 "[grp]_set_cpu: invalid vector 0x%x\n",
1293 1296 hdlp->ih_vector));
1294 1297 lock_clear(&apix_lock);
1295 1298 return (PSM_FAILURE);
1296 1299 }
1297 1300
1298 1301 *result = 0;
1299 1302
1300 1303 if (intr_op == PSM_INTR_OP_SET_CPU)
1301 1304 newvecp = apix_set_cpu(vecp, target, result);
1302 1305 else
1303 1306 newvecp = apix_grp_set_cpu(vecp, target, result);
1304 1307
1305 1308 lock_clear(&apix_lock);
1306 1309
1307 1310 if (newvecp == NULL) {
1308 1311 *result = EIO;
1309 1312 return (PSM_FAILURE);
1310 1313 }
1311 1314 newvecp->v_bound_cpuid = target;
1312 1315 hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1313 1316 newvecp->v_vector);
1314 1317 break;
1315 1318
1316 1319 case PSM_INTR_OP_GET_INTR:
1317 1320 /*
1318 1321 * The interrupt handle given here has been allocated
1319 1322 * specifically for this command, and ih_private carries
1320 1323 * a pointer to a apic_get_intr_t.
1321 1324 */
1322 1325 if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1323 1326 return (PSM_FAILURE);
1324 1327 break;
1325 1328
1326 1329 case PSM_INTR_OP_CHECK_MSI:
1327 1330 /*
1328 1331 * Check MSI/X is supported or not at APIC level and
1329 1332 * masked off the MSI/X bits in hdlp->ih_type if not
1330 1333 * supported before return. If MSI/X is supported,
1331 1334 * leave the ih_type unchanged and return.
1332 1335 *
1333 1336 * hdlp->ih_type passed in from the nexus has all the
1334 1337 * interrupt types supported by the device.
1335 1338 */
1336 1339 if (apic_support_msi == 0) { /* uninitialized */
1337 1340 /*
1338 1341 * if apic_support_msi is not set, call
1339 1342 * apic_check_msi_support() to check whether msi
1340 1343 * is supported first
1341 1344 */
1342 1345 if (apic_check_msi_support() == PSM_SUCCESS)
1343 1346 apic_support_msi = 1; /* supported */
1344 1347 else
1345 1348 apic_support_msi = -1; /* not-supported */
1346 1349 }
1347 1350 if (apic_support_msi == 1) {
1348 1351 if (apic_msix_enable)
1349 1352 *result = hdlp->ih_type;
1350 1353 else
1351 1354 *result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1352 1355 } else
1353 1356 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1354 1357 DDI_INTR_TYPE_MSIX);
1355 1358 break;
1356 1359 case PSM_INTR_OP_GET_CAP:
1357 1360 cap = DDI_INTR_FLAG_PENDING;
1358 1361 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1359 1362 cap |= DDI_INTR_FLAG_MASKABLE;
1360 1363 *result = cap;
1361 1364 break;
1362 1365 case PSM_INTR_OP_APIC_TYPE:
1363 1366 ((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1364 1367 apix_get_apic_type();
1365 1368 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1366 1369 APIX_IPI_MIN;
1367 1370 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1368 1371 apic_nproc;
1369 1372 hdlp->ih_ver = apic_get_apic_version();
1370 1373 break;
1371 1374 case PSM_INTR_OP_SET_CAP:
1372 1375 default:
1373 1376 return (PSM_FAILURE);
1374 1377 }
1375 1378
1376 1379 return (PSM_SUCCESS);
1377 1380 }
1378 1381
1379 1382 static void
1380 1383 apix_cleanup_busy(void)
1381 1384 {
1382 1385 int i, j;
1383 1386 apix_vector_t *vecp;
1384 1387
1385 1388 for (i = 0; i < apic_nproc; i++) {
1386 1389 if (!apic_cpu_in_range(i))
1387 1390 continue;
1388 1391 apic_cpus[i].aci_busy = 0;
1389 1392 for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1390 1393 if ((vecp = xv_vector(i, j)) != NULL)
1391 1394 vecp->v_busy = 0;
1392 1395 }
1393 1396 }
1394 1397 }
1395 1398
1396 1399 static void
1397 1400 apix_redistribute_compute(void)
1398 1401 {
1399 1402 int i, j, max_busy;
1400 1403
1401 1404 if (!apic_enable_dynamic_migration)
1402 1405 return;
1403 1406
1404 1407 if (++apic_nticks == apic_sample_factor_redistribution) {
1405 1408 /*
1406 1409 * Time to call apic_intr_redistribute().
1407 1410 * reset apic_nticks. This will cause max_busy
1408 1411 * to be calculated below and if it is more than
1409 1412 * apic_int_busy, we will do the whole thing
1410 1413 */
1411 1414 apic_nticks = 0;
1412 1415 }
1413 1416 max_busy = 0;
1414 1417 for (i = 0; i < apic_nproc; i++) {
1415 1418 if (!apic_cpu_in_range(i))
1416 1419 continue;
1417 1420 /*
1418 1421 * Check if curipl is non zero & if ISR is in
1419 1422 * progress
1420 1423 */
1421 1424 if (((j = apic_cpus[i].aci_curipl) != 0) &&
1422 1425 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1423 1426
1424 1427 int vect;
1425 1428 apic_cpus[i].aci_busy++;
1426 1429 vect = apic_cpus[i].aci_current[j];
1427 1430 apixs[i]->x_vectbl[vect]->v_busy++;
1428 1431 }
1429 1432
1430 1433 if (!apic_nticks &&
1431 1434 (apic_cpus[i].aci_busy > max_busy))
1432 1435 max_busy = apic_cpus[i].aci_busy;
1433 1436 }
1434 1437 if (!apic_nticks) {
1435 1438 if (max_busy > apic_int_busy_mark) {
1436 1439 /*
1437 1440 * We could make the following check be
1438 1441 * skipped > 1 in which case, we get a
1439 1442 * redistribution at half the busy mark (due to
1440 1443 * double interval). Need to be able to collect
1441 1444 * more empirical data to decide if that is a
1442 1445 * good strategy. Punt for now.
1443 1446 */
1444 1447 apix_cleanup_busy();
1445 1448 apic_skipped_redistribute = 0;
1446 1449 } else
1447 1450 apic_skipped_redistribute++;
1448 1451 }
1449 1452 }
1450 1453
1451 1454 /*
1452 1455 * intr_ops() service routines
1453 1456 */
1454 1457
1455 1458 static int
1456 1459 apix_get_pending(apix_vector_t *vecp)
1457 1460 {
1458 1461 int bit, index, irr, pending;
1459 1462
1460 1463 /* need to get on the bound cpu */
1461 1464 mutex_enter(&cpu_lock);
1462 1465 affinity_set(vecp->v_cpuid);
1463 1466
1464 1467 index = vecp->v_vector / 32;
1465 1468 bit = vecp->v_vector % 32;
1466 1469 irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1467 1470
1468 1471 affinity_clear();
1469 1472 mutex_exit(&cpu_lock);
1470 1473
1471 1474 pending = (irr & (1 << bit)) ? 1 : 0;
1472 1475 if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1473 1476 pending = apix_intx_get_pending(vecp->v_inum);
1474 1477
1475 1478 return (pending);
1476 1479 }
1477 1480
1478 1481 static apix_vector_t *
1479 1482 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1480 1483 {
1481 1484 apix_vector_t *vecp;
1482 1485 processorid_t cpuid;
1483 1486 int32_t virt_vec = 0;
1484 1487
1485 1488 switch (flags & PSMGI_INTRBY_FLAGS) {
1486 1489 case PSMGI_INTRBY_IRQ:
1487 1490 return (apix_intx_get_vector(hdlp->ih_vector));
1488 1491 case PSMGI_INTRBY_VEC:
1489 1492 virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1490 1493
1491 1494 cpuid = APIX_VIRTVEC_CPU(virt_vec);
1492 1495 if (!apic_cpu_in_range(cpuid))
1493 1496 return (NULL);
1494 1497
1495 1498 vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1496 1499 break;
1497 1500 case PSMGI_INTRBY_DEFAULT:
1498 1501 vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1499 1502 hdlp->ih_type);
1500 1503 break;
1501 1504 default:
1502 1505 return (NULL);
1503 1506 }
1504 1507
1505 1508 return (vecp);
1506 1509 }
1507 1510
1508 1511 static int
1509 1512 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1510 1513 apic_get_intr_t *intr_params_p)
1511 1514 {
1512 1515 apix_vector_t *vecp;
1513 1516 struct autovec *av_dev;
1514 1517 int i;
1515 1518
1516 1519 vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1517 1520 if (IS_VECT_FREE(vecp)) {
1518 1521 intr_params_p->avgi_num_devs = 0;
1519 1522 intr_params_p->avgi_cpu_id = 0;
1520 1523 intr_params_p->avgi_req_flags = 0;
1521 1524 return (PSM_SUCCESS);
1522 1525 }
1523 1526
1524 1527 if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1525 1528 intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1526 1529
1527 1530 /* Return user bound info for intrd. */
1528 1531 if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1529 1532 intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1530 1533 intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1531 1534 }
1532 1535 }
1533 1536
1534 1537 if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1535 1538 intr_params_p->avgi_vector = vecp->v_vector;
1536 1539
1537 1540 if (intr_params_p->avgi_req_flags &
1538 1541 (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1539 1542 /* Get number of devices from apic_irq table shared field. */
1540 1543 intr_params_p->avgi_num_devs = vecp->v_share;
1541 1544
1542 1545 if (intr_params_p->avgi_req_flags & PSMGI_REQ_GET_DEVS) {
1543 1546
1544 1547 intr_params_p->avgi_req_flags |= PSMGI_REQ_NUM_DEVS;
1545 1548
1546 1549 /* Some devices have NULL dip. Don't count these. */
1547 1550 if (intr_params_p->avgi_num_devs > 0) {
1548 1551 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1549 1552 av_dev = av_dev->av_link) {
1550 1553 if (av_dev->av_vector && av_dev->av_dip)
1551 1554 i++;
1552 1555 }
1553 1556 intr_params_p->avgi_num_devs =
1554 1557 (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1555 1558 }
1556 1559
1557 1560 /* There are no viable dips to return. */
1558 1561 if (intr_params_p->avgi_num_devs == 0) {
1559 1562 intr_params_p->avgi_dip_list = NULL;
1560 1563
1561 1564 } else { /* Return list of dips */
1562 1565
1563 1566 /* Allocate space in array for that number of devs. */
1564 1567 intr_params_p->avgi_dip_list = kmem_zalloc(
1565 1568 intr_params_p->avgi_num_devs *
1566 1569 sizeof (dev_info_t *),
1567 1570 KM_NOSLEEP);
1568 1571 if (intr_params_p->avgi_dip_list == NULL) {
1569 1572 DDI_INTR_IMPLDBG((CE_WARN,
1570 1573 "apix_get_vector_intr_info: no memory"));
1571 1574 return (PSM_FAILURE);
1572 1575 }
1573 1576
1574 1577 /*
1575 1578 * Loop through the device list of the autovec table
1576 1579 * filling in the dip array.
1577 1580 *
1578 1581 * Note that the autovect table may have some special
1579 1582 * entries which contain NULL dips. These will be
1580 1583 * ignored.
1581 1584 */
1582 1585 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1583 1586 av_dev = av_dev->av_link) {
1584 1587 if (av_dev->av_vector && av_dev->av_dip)
1585 1588 intr_params_p->avgi_dip_list[i++] =
1586 1589 av_dev->av_dip;
1587 1590 }
1588 1591 }
1589 1592 }
1590 1593
1591 1594 return (PSM_SUCCESS);
1592 1595 }
1593 1596
1594 1597 static char *
1595 1598 apix_get_apic_type(void)
1596 1599 {
1597 1600 return (apix_psm_info.p_mach_idstring);
1598 1601 }
1599 1602
1600 1603 apix_vector_t *
1601 1604 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1602 1605 {
1603 1606 apix_vector_t *newp = NULL;
1604 1607 dev_info_t *dip;
1605 1608 int inum, cap_ptr;
1606 1609 ddi_acc_handle_t handle;
1607 1610 ddi_intr_msix_t *msix_p = NULL;
1608 1611 ushort_t msix_ctrl;
1609 1612 uintptr_t off;
1610 1613 uint32_t mask;
1611 1614
1612 1615 ASSERT(LOCK_HELD(&apix_lock));
1613 1616 *result = ENXIO;
1614 1617
1615 1618 /* Fail if this is an MSI intr and is part of a group. */
1616 1619 if (vecp->v_type == APIX_TYPE_MSI) {
1617 1620 if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1618 1621 return (NULL);
1619 1622 else
1620 1623 return (apix_grp_set_cpu(vecp, new_cpu, result));
1621 1624 }
1622 1625
1623 1626 /*
1624 1627 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1625 1628 */
1626 1629 if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1627 1630 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1628 1631 return (NULL);
1629 1632 inum = vecp->v_devp->dv_inum;
1630 1633
1631 1634 handle = i_ddi_get_pci_config_handle(dip);
1632 1635 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1633 1636 msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1634 1637 if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1635 1638 /*
1636 1639 * Function is not masked, then mask "inum"th
1637 1640 * entry in the MSI-X table
1638 1641 */
1639 1642 msix_p = i_ddi_get_msix(dip);
1640 1643 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1641 1644 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1642 1645 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1643 1646 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1644 1647 mask | 1);
1645 1648 }
1646 1649 }
1647 1650
1648 1651 *result = 0;
1649 1652 if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1650 1653 *result = EIO;
1651 1654
1652 1655 /* Restore mask bit */
1653 1656 if (msix_p != NULL)
1654 1657 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1655 1658
1656 1659 return (newp);
1657 1660 }
1658 1661
1659 1662 /*
1660 1663 * Set cpu for MSIs
1661 1664 */
1662 1665 apix_vector_t *
1663 1666 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1664 1667 {
1665 1668 apix_vector_t *newp, *vp;
1666 1669 uint32_t orig_cpu = vecp->v_cpuid;
1667 1670 int orig_vect = vecp->v_vector;
1668 1671 int i, num_vectors, cap_ptr, msi_mask_off;
1669 1672 uint32_t msi_pvm;
1670 1673 ushort_t msi_ctrl;
1671 1674 ddi_acc_handle_t handle;
1672 1675 dev_info_t *dip;
1673 1676
1674 1677 APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1675 1678 " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1676 1679
1677 1680 ASSERT(LOCK_HELD(&apix_lock));
1678 1681
1679 1682 *result = ENXIO;
1680 1683
1681 1684 if (vecp->v_type != APIX_TYPE_MSI) {
1682 1685 DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1683 1686 return (NULL);
1684 1687 }
1685 1688
1686 1689 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1687 1690 return (NULL);
1688 1691
1689 1692 num_vectors = i_ddi_intr_get_current_nintrs(dip);
1690 1693 if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1691 1694 APIC_VERBOSE(INTR, (CE_WARN,
1692 1695 "set_grp: base vec not part of a grp or not aligned: "
1693 1696 "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1694 1697 return (NULL);
1695 1698 }
1696 1699
1697 1700 if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1698 1701 return (NULL);
1699 1702
1700 1703 *result = EIO;
1701 1704 for (i = 1; i < num_vectors; i++) {
1702 1705 if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1703 1706 return (NULL);
1704 1707 #ifdef DEBUG
1705 1708 /*
1706 1709 * Sanity check: CPU and dip is the same for all entries.
1707 1710 * May be called when first msi to be enabled, at this time
1708 1711 * add_avintr() is not called for other msi
1709 1712 */
1710 1713 if ((vp->v_share != 0) &&
1711 1714 ((APIX_GET_DIP(vp) != dip) ||
1712 1715 (vp->v_cpuid != vecp->v_cpuid))) {
1713 1716 APIC_VERBOSE(INTR, (CE_WARN,
1714 1717 "set_grp: cpu or dip for vec 0x%x difft than for "
1715 1718 "vec 0x%x\n", orig_vect, orig_vect + i));
1716 1719 APIC_VERBOSE(INTR, (CE_WARN,
1717 1720 " cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1718 1721 vp->v_cpuid, (void *)dip,
1719 1722 (void *)APIX_GET_DIP(vp)));
1720 1723 return (NULL);
1721 1724 }
1722 1725 #endif /* DEBUG */
1723 1726 }
1724 1727
1725 1728 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1726 1729 handle = i_ddi_get_pci_config_handle(dip);
1727 1730 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1728 1731
1729 1732 /* MSI Per vector masking is supported. */
1730 1733 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1731 1734 if (msi_ctrl & PCI_MSI_64BIT_MASK)
1732 1735 msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1733 1736 else
1734 1737 msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1735 1738 msi_pvm = pci_config_get32(handle, msi_mask_off);
1736 1739 pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1737 1740 APIC_VERBOSE(INTR, (CE_CONT,
1738 1741 "set_grp: pvm supported. Mask set to 0x%x\n",
1739 1742 pci_config_get32(handle, msi_mask_off)));
1740 1743 }
1741 1744
1742 1745 if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1743 1746 *result = 0;
1744 1747
1745 1748 /* Reenable vectors if per vector masking is supported. */
1746 1749 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1747 1750 pci_config_put32(handle, msi_mask_off, msi_pvm);
1748 1751 APIC_VERBOSE(INTR, (CE_CONT,
1749 1752 "set_grp: pvm supported. Mask restored to 0x%x\n",
1750 1753 pci_config_get32(handle, msi_mask_off)));
1751 1754 }
1752 1755
1753 1756 return (newp);
1754 1757 }
1755 1758
1756 1759 void
1757 1760 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1758 1761 {
1759 1762 apic_irq_t *irqp;
1760 1763
1761 1764 mutex_enter(&airq_mutex);
1762 1765 irqp = apic_irq_table[irqno];
1763 1766 irqp->airq_cpu = cpuid;
1764 1767 irqp->airq_vector = vector;
1765 1768 apic_record_rdt_entry(irqp, irqno);
1766 1769 mutex_exit(&airq_mutex);
1767 1770 }
1768 1771
1769 1772 apix_vector_t *
1770 1773 apix_intx_get_vector(int irqno)
1771 1774 {
1772 1775 apic_irq_t *irqp;
1773 1776 uint32_t cpuid;
1774 1777 uchar_t vector;
1775 1778
1776 1779 mutex_enter(&airq_mutex);
1777 1780 irqp = apic_irq_table[irqno & 0xff];
1778 1781 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1779 1782 mutex_exit(&airq_mutex);
1780 1783 return (NULL);
1781 1784 }
1782 1785 cpuid = irqp->airq_cpu;
1783 1786 vector = irqp->airq_vector;
1784 1787 mutex_exit(&airq_mutex);
1785 1788
1786 1789 return (xv_vector(cpuid, vector));
1787 1790 }
1788 1791
1789 1792 /*
1790 1793 * Must called with interrupts disabled and apic_ioapic_lock held
1791 1794 */
1792 1795 void
1793 1796 apix_intx_enable(int irqno)
1794 1797 {
1795 1798 uchar_t ioapicindex, intin;
1796 1799 apic_irq_t *irqp = apic_irq_table[irqno];
1797 1800 ioapic_rdt_t irdt;
1798 1801 apic_cpus_info_t *cpu_infop;
1799 1802 apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1800 1803
1801 1804 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1802 1805
1803 1806 ioapicindex = irqp->airq_ioapicindex;
1804 1807 intin = irqp->airq_intin_no;
1805 1808 cpu_infop = &apic_cpus[irqp->airq_cpu];
1806 1809
1807 1810 irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1808 1811 irdt.ir_hi = cpu_infop->aci_local_id;
1809 1812
1810 1813 apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1811 1814 vecp->v_type, 1, ioapicindex);
1812 1815 apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1813 1816 (void *)&irdt, vecp->v_type, 1);
1814 1817 apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1815 1818
1816 1819 /* write RDT entry high dword - destination */
1817 1820 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1818 1821 irdt.ir_hi);
1819 1822
1820 1823 /* Write the vector, trigger, and polarity portion of the RDT */
1821 1824 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1822 1825
1823 1826 vecp->v_state = APIX_STATE_ENABLED;
1824 1827
1825 1828 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1826 1829 " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1827 1830 ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1828 1831 }
1829 1832
1830 1833 /*
1831 1834 * Must called with interrupts disabled and apic_ioapic_lock held
1832 1835 */
1833 1836 void
1834 1837 apix_intx_disable(int irqno)
1835 1838 {
1836 1839 apic_irq_t *irqp = apic_irq_table[irqno];
1837 1840 int ioapicindex, intin;
1838 1841
1839 1842 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1840 1843 /*
1841 1844 * The assumption here is that this is safe, even for
1842 1845 * systems with IOAPICs that suffer from the hardware
1843 1846 * erratum because all devices have been quiesced before
1844 1847 * they unregister their interrupt handlers. If that
1845 1848 * assumption turns out to be false, this mask operation
1846 1849 * can induce the same erratum result we're trying to
1847 1850 * avoid.
1848 1851 */
1849 1852 ioapicindex = irqp->airq_ioapicindex;
1850 1853 intin = irqp->airq_intin_no;
1851 1854 ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1852 1855
1853 1856 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1854 1857 " intin 0x%x\n", ioapicindex, intin));
1855 1858 }
1856 1859
1857 1860 void
1858 1861 apix_intx_free(int irqno)
1859 1862 {
1860 1863 apic_irq_t *irqp;
1861 1864
1862 1865 mutex_enter(&airq_mutex);
1863 1866 irqp = apic_irq_table[irqno];
1864 1867
1865 1868 if (IS_IRQ_FREE(irqp)) {
1866 1869 mutex_exit(&airq_mutex);
1867 1870 return;
1868 1871 }
1869 1872
1870 1873 irqp->airq_mps_intr_index = FREE_INDEX;
1871 1874 irqp->airq_cpu = IRQ_UNINIT;
1872 1875 irqp->airq_vector = APIX_INVALID_VECT;
1873 1876 mutex_exit(&airq_mutex);
1874 1877 }
1875 1878
1876 1879 #ifdef DEBUG
1877 1880 int apix_intr_deliver_timeouts = 0;
1878 1881 int apix_intr_rirr_timeouts = 0;
1879 1882 int apix_intr_rirr_reset_failure = 0;
1880 1883 #endif
1881 1884 int apix_max_reps_irr_pending = 10;
1882 1885
1883 1886 #define GET_RDT_BITS(ioapic, intin, bits) \
1884 1887 (READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1885 1888 #define APIX_CHECK_IRR_DELAY drv_usectohz(5000)
1886 1889
1887 1890 int
1888 1891 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1889 1892 {
1890 1893 apic_irq_t *irqp = apic_irq_table[irqno];
1891 1894 ulong_t iflag;
1892 1895 int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1893 1896
1894 1897 ASSERT(irqp != NULL);
1895 1898
1896 1899 iflag = intr_clear();
1897 1900 lock_set(&apic_ioapic_lock);
1898 1901
1899 1902 ioapic_ix = irqp->airq_ioapicindex;
1900 1903 intin_no = irqp->airq_intin_no;
1901 1904 level = apic_level_intr[irqno];
1902 1905
1903 1906 /*
1904 1907 * Wait for the delivery status bit to be cleared. This should
1905 1908 * be a very small amount of time.
1906 1909 */
1907 1910 repeats = 0;
1908 1911 do {
1909 1912 repeats++;
1910 1913
1911 1914 for (waited = 0; waited < apic_max_reps_clear_pending;
1912 1915 waited++) {
1913 1916 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1914 1917 break;
1915 1918 }
1916 1919 if (!level)
1917 1920 break;
1918 1921
1919 1922 /*
1920 1923 * Mask the RDT entry for level-triggered interrupts.
1921 1924 */
1922 1925 irqp->airq_rdt_entry |= AV_MASK;
1923 1926 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1924 1927 intin_no);
1925 1928 if ((masked = (rdt_entry & AV_MASK)) == 0) {
1926 1929 /* Mask it */
1927 1930 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1928 1931 AV_MASK | rdt_entry);
1929 1932 }
1930 1933
1931 1934 /*
1932 1935 * If there was a race and an interrupt was injected
1933 1936 * just before we masked, check for that case here.
1934 1937 * Then, unmask the RDT entry and try again. If we're
1935 1938 * on our last try, don't unmask (because we want the
1936 1939 * RDT entry to remain masked for the rest of the
1937 1940 * function).
1938 1941 */
1939 1942 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1940 1943 intin_no);
1941 1944 if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1942 1945 (repeats < apic_max_reps_clear_pending)) {
1943 1946 /* Unmask it */
1944 1947 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1945 1948 intin_no, rdt_entry & ~AV_MASK);
1946 1949 irqp->airq_rdt_entry &= ~AV_MASK;
1947 1950 }
1948 1951 } while ((rdt_entry & AV_PENDING) &&
1949 1952 (repeats < apic_max_reps_clear_pending));
1950 1953
1951 1954 #ifdef DEBUG
1952 1955 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1953 1956 apix_intr_deliver_timeouts++;
1954 1957 #endif
1955 1958
1956 1959 if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1957 1960 goto done;
1958 1961
1959 1962 /*
1960 1963 * wait for remote IRR to be cleared for level-triggered
1961 1964 * interrupts
1962 1965 */
1963 1966 repeats = 0;
1964 1967 do {
1965 1968 repeats++;
1966 1969
1967 1970 for (waited = 0; waited < apic_max_reps_clear_pending;
1968 1971 waited++) {
1969 1972 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1970 1973 == 0)
1971 1974 break;
1972 1975 }
1973 1976
1974 1977 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1975 1978 lock_clear(&apic_ioapic_lock);
1976 1979 intr_restore(iflag);
1977 1980
1978 1981 delay(APIX_CHECK_IRR_DELAY);
1979 1982
1980 1983 iflag = intr_clear();
1981 1984 lock_set(&apic_ioapic_lock);
1982 1985 }
1983 1986 } while (repeats < apix_max_reps_irr_pending);
1984 1987
1985 1988 if (repeats >= apix_max_reps_irr_pending) {
1986 1989 #ifdef DEBUG
1987 1990 apix_intr_rirr_timeouts++;
1988 1991 #endif
1989 1992
1990 1993 /*
1991 1994 * If we waited and the Remote IRR bit is still not cleared,
1992 1995 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1993 1996 * times for this interrupt, try the last-ditch workaround:
1994 1997 */
1995 1998 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1996 1999 /*
1997 2000 * Trying to clear the bit through normal
1998 2001 * channels has failed. So as a last-ditch
1999 2002 * effort, try to set the trigger mode to
2000 2003 * edge, then to level. This has been
2001 2004 * observed to work on many systems.
2002 2005 */
2003 2006 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2004 2007 intin_no,
2005 2008 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2006 2009 intin_no) & ~AV_LEVEL);
2007 2010 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2008 2011 intin_no,
2009 2012 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2010 2013 intin_no) | AV_LEVEL);
2011 2014 }
2012 2015
2013 2016 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2014 2017 #ifdef DEBUG
2015 2018 apix_intr_rirr_reset_failure++;
2016 2019 #endif
2017 2020 lock_clear(&apic_ioapic_lock);
2018 2021 intr_restore(iflag);
2019 2022 prom_printf("apix: Remote IRR still "
2020 2023 "not clear for IOAPIC %d intin %d.\n"
2021 2024 "\tInterrupts to this pin may cease "
2022 2025 "functioning.\n", ioapic_ix, intin_no);
2023 2026 return (1); /* return failure */
2024 2027 }
2025 2028 }
2026 2029
2027 2030 done:
2028 2031 /* change apic_irq_table */
2029 2032 lock_clear(&apic_ioapic_lock);
2030 2033 intr_restore(iflag);
2031 2034 apix_intx_set_vector(irqno, cpuid, vector);
2032 2035 iflag = intr_clear();
2033 2036 lock_set(&apic_ioapic_lock);
2034 2037
2035 2038 /* reprogramme IO-APIC RDT entry */
2036 2039 apix_intx_enable(irqno);
2037 2040
2038 2041 lock_clear(&apic_ioapic_lock);
2039 2042 intr_restore(iflag);
2040 2043
2041 2044 return (0);
2042 2045 }
2043 2046
2044 2047 static int
2045 2048 apix_intx_get_pending(int irqno)
2046 2049 {
2047 2050 apic_irq_t *irqp;
2048 2051 int intin, ioapicindex, pending;
2049 2052 ulong_t iflag;
2050 2053
2051 2054 mutex_enter(&airq_mutex);
2052 2055 irqp = apic_irq_table[irqno];
2053 2056 if (IS_IRQ_FREE(irqp)) {
2054 2057 mutex_exit(&airq_mutex);
2055 2058 return (0);
2056 2059 }
2057 2060
2058 2061 /* check IO-APIC delivery status */
2059 2062 intin = irqp->airq_intin_no;
2060 2063 ioapicindex = irqp->airq_ioapicindex;
2061 2064 mutex_exit(&airq_mutex);
2062 2065
2063 2066 iflag = intr_clear();
2064 2067 lock_set(&apic_ioapic_lock);
2065 2068
2066 2069 pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2067 2070 AV_PENDING) ? 1 : 0;
2068 2071
2069 2072 lock_clear(&apic_ioapic_lock);
2070 2073 intr_restore(iflag);
2071 2074
2072 2075 return (pending);
2073 2076 }
2074 2077
2075 2078 /*
2076 2079 * This function will mask the interrupt on the I/O APIC
2077 2080 */
2078 2081 static void
2079 2082 apix_intx_set_mask(int irqno)
2080 2083 {
2081 2084 int intin, ioapixindex, rdt_entry;
2082 2085 ulong_t iflag;
2083 2086 apic_irq_t *irqp;
2084 2087
2085 2088 mutex_enter(&airq_mutex);
2086 2089 irqp = apic_irq_table[irqno];
2087 2090
2088 2091 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2089 2092
2090 2093 intin = irqp->airq_intin_no;
2091 2094 ioapixindex = irqp->airq_ioapicindex;
2092 2095 mutex_exit(&airq_mutex);
2093 2096
2094 2097 iflag = intr_clear();
2095 2098 lock_set(&apic_ioapic_lock);
2096 2099
2097 2100 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2098 2101
2099 2102 /* clear mask */
2100 2103 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2101 2104 (AV_MASK | rdt_entry));
2102 2105
2103 2106 lock_clear(&apic_ioapic_lock);
2104 2107 intr_restore(iflag);
2105 2108 }
2106 2109
2107 2110 /*
2108 2111 * This function will clear the mask for the interrupt on the I/O APIC
2109 2112 */
2110 2113 static void
2111 2114 apix_intx_clear_mask(int irqno)
2112 2115 {
2113 2116 int intin, ioapixindex, rdt_entry;
2114 2117 ulong_t iflag;
2115 2118 apic_irq_t *irqp;
2116 2119
2117 2120 mutex_enter(&airq_mutex);
2118 2121 irqp = apic_irq_table[irqno];
2119 2122
2120 2123 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2121 2124
2122 2125 intin = irqp->airq_intin_no;
2123 2126 ioapixindex = irqp->airq_ioapicindex;
2124 2127 mutex_exit(&airq_mutex);
2125 2128
2126 2129 iflag = intr_clear();
2127 2130 lock_set(&apic_ioapic_lock);
2128 2131
2129 2132 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2130 2133
2131 2134 /* clear mask */
2132 2135 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2133 2136 ((~AV_MASK) & rdt_entry));
2134 2137
2135 2138 lock_clear(&apic_ioapic_lock);
2136 2139 intr_restore(iflag);
2137 2140 }
2138 2141
2139 2142 /*
2140 2143 * For level-triggered interrupt, mask the IRQ line. Mask means
2141 2144 * new interrupts will not be delivered. The interrupt already
2142 2145 * accepted by a local APIC is not affected
2143 2146 */
2144 2147 void
2145 2148 apix_level_intr_pre_eoi(int irq)
2146 2149 {
2147 2150 apic_irq_t *irqp = apic_irq_table[irq];
2148 2151 int apic_ix, intin_ix;
2149 2152
2150 2153 if (irqp == NULL)
2151 2154 return;
2152 2155
2153 2156 ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2154 2157
2155 2158 lock_set(&apic_ioapic_lock);
2156 2159
2157 2160 intin_ix = irqp->airq_intin_no;
2158 2161 apic_ix = irqp->airq_ioapicindex;
2159 2162
2160 2163 if (irqp->airq_cpu != CPU->cpu_id) {
2161 2164 if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2162 2165 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2163 2166 lock_clear(&apic_ioapic_lock);
2164 2167 return;
2165 2168 }
2166 2169
2167 2170 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2168 2171 /*
2169 2172 * This is a IOxAPIC and there is EOI register:
2170 2173 * Change the vector to reserved unused vector, so that
2171 2174 * the EOI from Local APIC won't clear the Remote IRR for
2172 2175 * this level trigger interrupt. Instead, we'll manually
2173 2176 * clear it in apix_post_hardint() after ISR handling.
2174 2177 */
2175 2178 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2176 2179 (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2177 2180 } else {
2178 2181 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2179 2182 AV_MASK | irqp->airq_rdt_entry);
2180 2183 }
2181 2184
2182 2185 lock_clear(&apic_ioapic_lock);
2183 2186 }
2184 2187
2185 2188 /*
2186 2189 * For level-triggered interrupt, unmask the IRQ line
2187 2190 * or restore the original vector number.
2188 2191 */
2189 2192 void
2190 2193 apix_level_intr_post_dispatch(int irq)
2191 2194 {
2192 2195 apic_irq_t *irqp = apic_irq_table[irq];
2193 2196 int apic_ix, intin_ix;
2194 2197
2195 2198 if (irqp == NULL)
2196 2199 return;
2197 2200
2198 2201 lock_set(&apic_ioapic_lock);
2199 2202
2200 2203 intin_ix = irqp->airq_intin_no;
2201 2204 apic_ix = irqp->airq_ioapicindex;
2202 2205
2203 2206 if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2204 2207 /*
2205 2208 * Already sent EOI back to Local APIC.
2206 2209 * Send EOI to IO-APIC
2207 2210 */
2208 2211 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2209 2212 } else {
2210 2213 /* clear the mask or restore the vector */
2211 2214 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2212 2215 irqp->airq_rdt_entry);
2213 2216
2214 2217 /* send EOI to IOxAPIC */
2215 2218 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2216 2219 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2217 2220 }
2218 2221
2219 2222 lock_clear(&apic_ioapic_lock);
2220 2223 }
2221 2224
2222 2225 static int
2223 2226 apix_intx_get_shared(int irqno)
2224 2227 {
2225 2228 apic_irq_t *irqp;
2226 2229 int share;
2227 2230
2228 2231 mutex_enter(&airq_mutex);
2229 2232 irqp = apic_irq_table[irqno];
2230 2233 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2231 2234 mutex_exit(&airq_mutex);
2232 2235 return (0);
2233 2236 }
2234 2237 share = irqp->airq_share;
2235 2238 mutex_exit(&airq_mutex);
2236 2239
2237 2240 return (share);
2238 2241 }
2239 2242
2240 2243 static void
2241 2244 apix_intx_set_shared(int irqno, int delta)
2242 2245 {
2243 2246 apic_irq_t *irqp;
2244 2247
2245 2248 mutex_enter(&airq_mutex);
2246 2249 irqp = apic_irq_table[irqno];
2247 2250 if (IS_IRQ_FREE(irqp)) {
2248 2251 mutex_exit(&airq_mutex);
2249 2252 return;
2250 2253 }
2251 2254 irqp->airq_share += delta;
2252 2255 mutex_exit(&airq_mutex);
2253 2256 }
2254 2257
2255 2258 /*
2256 2259 * Setup IRQ table. Return IRQ no or -1 on failure
2257 2260 */
2258 2261 static int
2259 2262 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2260 2263 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2261 2264 {
2262 2265 int origirq = ispec->intrspec_vec;
2263 2266 int newirq;
2264 2267 short intr_index;
2265 2268 uchar_t ipin, ioapic, ioapicindex;
2266 2269 apic_irq_t *irqp;
2267 2270
2268 2271 UNREFERENCED_1PARAMETER(inum);
2269 2272
2270 2273 if (intrp != NULL) {
2271 2274 intr_index = (short)(intrp - apic_io_intrp);
2272 2275 ioapic = intrp->intr_destid;
2273 2276 ipin = intrp->intr_destintin;
2274 2277
2275 2278 /* Find ioapicindex. If destid was ALL, we will exit with 0. */
2276 2279 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2277 2280 if (apic_io_id[ioapicindex] == ioapic)
2278 2281 break;
2279 2282 ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2280 2283 (ioapic == INTR_ALL_APIC));
2281 2284
2282 2285 /* check whether this intin# has been used by another irqno */
2283 2286 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2284 2287 return (newirq);
2285 2288
2286 2289 } else if (iflagp != NULL) { /* ACPI */
2287 2290 intr_index = ACPI_INDEX;
2288 2291 ioapicindex = acpi_find_ioapic(irqno);
2289 2292 ASSERT(ioapicindex != 0xFF);
2290 2293 ioapic = apic_io_id[ioapicindex];
2291 2294 ipin = irqno - apic_io_vectbase[ioapicindex];
2292 2295
2293 2296 if (apic_irq_table[irqno] &&
2294 2297 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2295 2298 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2296 2299 apic_irq_table[irqno]->airq_ioapicindex ==
2297 2300 ioapicindex);
2298 2301 return (irqno);
2299 2302 }
2300 2303
2301 2304 } else { /* default configuration */
2302 2305 intr_index = DEFAULT_INDEX;
2303 2306 ioapicindex = 0;
2304 2307 ioapic = apic_io_id[ioapicindex];
2305 2308 ipin = (uchar_t)irqno;
2306 2309 }
2307 2310
2308 2311 /* allocate a new IRQ no */
2309 2312 if ((irqp = apic_irq_table[irqno]) == NULL) {
2310 2313 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2311 2314 apic_irq_table[irqno] = irqp;
2312 2315 } else {
2313 2316 if (irqp->airq_mps_intr_index != FREE_INDEX) {
2314 2317 newirq = apic_allocate_irq(apic_first_avail_irq);
2315 2318 if (newirq == -1) {
2316 2319 return (-1);
2317 2320 }
2318 2321 irqno = newirq;
2319 2322 irqp = apic_irq_table[irqno];
2320 2323 ASSERT(irqp != NULL);
2321 2324 }
2322 2325 }
2323 2326 apic_max_device_irq = max(irqno, apic_max_device_irq);
2324 2327 apic_min_device_irq = min(irqno, apic_min_device_irq);
2325 2328
2326 2329 irqp->airq_mps_intr_index = intr_index;
2327 2330 irqp->airq_ioapicindex = ioapicindex;
2328 2331 irqp->airq_intin_no = ipin;
2329 2332 irqp->airq_dip = dip;
2330 2333 irqp->airq_origirq = (uchar_t)origirq;
2331 2334 if (iflagp != NULL)
2332 2335 irqp->airq_iflag = *iflagp;
2333 2336 irqp->airq_cpu = IRQ_UNINIT;
2334 2337 irqp->airq_vector = 0;
2335 2338
2336 2339 return (irqno);
2337 2340 }
2338 2341
2339 2342 /*
2340 2343 * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2341 2344 */
2342 2345 static int
2343 2346 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2344 2347 struct intrspec *ispec)
2345 2348 {
2346 2349 int irqno = ispec->intrspec_vec;
2347 2350 int newirq, i;
2348 2351 iflag_t intr_flag;
2349 2352 ACPI_SUBTABLE_HEADER *hp;
2350 2353 ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2351 2354 struct apic_io_intr *intrp;
2352 2355
2353 2356 if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2354 2357 int busid;
2355 2358
2356 2359 if (bustype == 0)
2357 2360 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2358 2361
2359 2362 /* loop checking BUS_ISA/BUS_EISA */
2360 2363 for (i = 0; i < 2; i++) {
2361 2364 if (((busid = apic_find_bus_id(bustype)) != -1) &&
2362 2365 ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2363 2366 != NULL)) {
2364 2367 return (apix_intx_setup(dip, inum, irqno,
2365 2368 intrp, ispec, NULL));
2366 2369 }
2367 2370 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2368 2371 }
2369 2372
2370 2373 /* fall back to default configuration */
2371 2374 return (-1);
2372 2375 }
2373 2376
2374 2377 /* search iso entries first */
2375 2378 if (acpi_iso_cnt != 0) {
2376 2379 hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2377 2380 i = 0;
2378 2381 while (i < acpi_iso_cnt) {
2379 2382 if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2380 2383 isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2381 2384 if (isop->Bus == 0 &&
2382 2385 isop->SourceIrq == irqno) {
2383 2386 newirq = isop->GlobalIrq;
2384 2387 intr_flag.intr_po = isop->IntiFlags &
2385 2388 ACPI_MADT_POLARITY_MASK;
2386 2389 intr_flag.intr_el = (isop->IntiFlags &
2387 2390 ACPI_MADT_TRIGGER_MASK) >> 2;
2388 2391 intr_flag.bustype = BUS_ISA;
2389 2392
2390 2393 return (apix_intx_setup(dip, inum,
2391 2394 newirq, NULL, ispec, &intr_flag));
2392 2395 }
2393 2396 i++;
2394 2397 }
2395 2398 hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2396 2399 hp->Length);
2397 2400 }
2398 2401 }
2399 2402 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2400 2403 intr_flag.intr_el = INTR_EL_EDGE;
2401 2404 intr_flag.bustype = BUS_ISA;
2402 2405 return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2403 2406 }
2404 2407
2405 2408
2406 2409 /*
2407 2410 * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2408 2411 */
2409 2412 static int
2410 2413 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2411 2414 struct intrspec *ispec)
2412 2415 {
2413 2416 int busid, devid, pci_irq;
2414 2417 ddi_acc_handle_t cfg_handle;
2415 2418 uchar_t ipin;
2416 2419 iflag_t intr_flag;
2417 2420 struct apic_io_intr *intrp;
2418 2421
2419 2422 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2420 2423 return (-1);
2421 2424
2422 2425 if (busid == 0 && apic_pci_bus_total == 1)
2423 2426 busid = (int)apic_single_pci_busid;
2424 2427
2425 2428 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2426 2429 return (-1);
2427 2430 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2428 2431 pci_config_teardown(&cfg_handle);
2429 2432
2430 2433 if (apic_enable_acpi && !apic_use_acpi_madt_only) { /* ACPI */
2431 2434 if (apic_acpi_translate_pci_irq(dip, busid, devid,
2432 2435 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2433 2436 return (-1);
2434 2437
2435 2438 intr_flag.bustype = (uchar_t)bustype;
2436 2439 return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2437 2440 &intr_flag));
2438 2441 }
2439 2442
2440 2443 /* MP configuration table */
2441 2444 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2442 2445 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2443 2446 pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2444 2447 if (pci_irq == -1)
2445 2448 return (-1);
2446 2449 }
2447 2450
2448 2451 return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2449 2452 }
2450 2453
2451 2454 /*
2452 2455 * Translate and return IRQ no
2453 2456 */
2454 2457 static int
2455 2458 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2456 2459 {
2457 2460 int newirq, irqno = ispec->intrspec_vec;
2458 2461 int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2459 2462 int bustype = 0, dev_len;
2460 2463 char dev_type[16];
2461 2464
2462 2465 if (apic_defconf) {
2463 2466 mutex_enter(&airq_mutex);
2464 2467 goto defconf;
2465 2468 }
2466 2469
2467 2470 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2468 2471 mutex_enter(&airq_mutex);
2469 2472 goto nonpci;
2470 2473 }
2471 2474
2472 2475 /*
2473 2476 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2474 2477 * to avoid extra buffer allocation.
2475 2478 */
2476 2479 dev_len = sizeof (dev_type);
2477 2480 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2478 2481 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2479 2482 &dev_len) == DDI_PROP_SUCCESS) {
2480 2483 if ((strcmp(dev_type, "pci") == 0) ||
2481 2484 (strcmp(dev_type, "pciex") == 0))
2482 2485 parent_is_pci_or_pciex = 1;
2483 2486 }
2484 2487
2485 2488 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2486 2489 DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2487 2490 &dev_len) == DDI_PROP_SUCCESS) {
2488 2491 if (strstr(dev_type, "pciex"))
2489 2492 child_is_pciex = 1;
2490 2493 }
2491 2494
2492 2495 mutex_enter(&airq_mutex);
2493 2496
2494 2497 if (parent_is_pci_or_pciex) {
2495 2498 bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2496 2499 newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2497 2500 if (newirq != -1)
2498 2501 goto done;
2499 2502 bustype = 0;
2500 2503 } else if (strcmp(dev_type, "isa") == 0)
2501 2504 bustype = BUS_ISA;
2502 2505 else if (strcmp(dev_type, "eisa") == 0)
2503 2506 bustype = BUS_EISA;
2504 2507
2505 2508 nonpci:
2506 2509 newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2507 2510 if (newirq != -1)
2508 2511 goto done;
2509 2512
2510 2513 defconf:
2511 2514 newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2512 2515 if (newirq == -1) {
2513 2516 mutex_exit(&airq_mutex);
2514 2517 return (-1);
2515 2518 }
2516 2519 done:
2517 2520 ASSERT(apic_irq_table[newirq]);
2518 2521 mutex_exit(&airq_mutex);
2519 2522 return (newirq);
2520 2523 }
2521 2524
2522 2525 static int
2523 2526 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2524 2527 {
2525 2528 int irqno;
2526 2529 apix_vector_t *vecp;
2527 2530
2528 2531 if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2529 2532 return (0);
2530 2533
2531 2534 if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2532 2535 return (0);
2533 2536
2534 2537 DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2535 2538 "irqno=0x%x cpuid=%d vector=0x%x\n",
2536 2539 (void *)dip, ddi_driver_name(dip), irqno,
2537 2540 vecp->v_cpuid, vecp->v_vector));
2538 2541
2539 2542 return (1);
2540 2543 }
2541 2544
2542 2545 /*
2543 2546 * Return the vector number if the translated IRQ for this device
2544 2547 * has a vector mapping setup. If no IRQ setup exists or no vector is
2545 2548 * allocated to it then return 0.
2546 2549 */
2547 2550 static apix_vector_t *
2548 2551 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2549 2552 {
2550 2553 int irqno;
2551 2554 apix_vector_t *vecp;
2552 2555
↓ open down ↓ |
2319 lines elided |
↑ open up ↑ |
2553 2556 /* get the IRQ number */
2554 2557 if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2555 2558 return (NULL);
2556 2559
2557 2560 /* get the vector number if a vector is allocated to this irqno */
2558 2561 vecp = apix_intx_get_vector(irqno);
2559 2562
2560 2563 return (vecp);
2561 2564 }
2562 2565
2566 +/*
2567 + * Switch between safe and x2APIC IPI sending method.
2568 + * The CPU may power on in xapic mode or x2apic mode. If the CPU needs to send
2569 + * an IPI to other CPUs before entering x2APIC mode, it still needs to use the
2570 + * xAPIC method. Before sending a StartIPI to the target CPU, psm_send_ipi will
2571 + * be changed to apic_common_send_ipi, which detects current local APIC mode and
2572 + * use the right method to send an IPI. If some CPUs fail to start up,
2573 + * apic_poweron_cnt won't return to zero, so apic_common_send_ipi will always be
2574 + * used. psm_send_ipi can't be simply changed back to x2apic_send_ipi if some
2575 + * CPUs failed to start up because those failed CPUs may recover itself later at
2576 + * unpredictable time.
2577 + */
2578 +void
2579 +apic_switch_ipi_callback(boolean_t enter)
2580 +{
2581 + ulong_t iflag;
2582 + struct psm_ops *pops = psmops;
2583 +
2584 + iflag = intr_clear();
2585 + lock_set(&apic_mode_switch_lock);
2586 + if (enter) {
2587 + ASSERT(apic_poweron_cnt >= 0);
2588 + if (apic_poweron_cnt == 0) {
2589 + pops->psm_send_ipi = apic_common_send_ipi;
2590 + send_dirintf = pops->psm_send_ipi;
2591 + }
2592 + apic_poweron_cnt++;
2593 + } else {
2594 + ASSERT(apic_poweron_cnt > 0);
2595 + apic_poweron_cnt--;
2596 + if (apic_poweron_cnt == 0) {
2597 + pops->psm_send_ipi = x2apic_send_ipi;
2598 + send_dirintf = pops->psm_send_ipi;
2599 + }
2600 + }
2601 + lock_clear(&apic_mode_switch_lock);
2602 + intr_restore(iflag);
2603 +}
2604 +
2563 2605 /* stub function */
2564 2606 int
2565 2607 apix_loaded(void)
2566 2608 {
2567 2609 return (apix_is_enabled);
2568 2610 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX