1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /*
26 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
27 * Copyright (c) 2016 by Delphix. All rights reserved.
28 */
29
30 /*
31 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34 * PSMI 1.5 extensions are supported in Solaris Nevada.
35 * PSMI 1.6 extensions are supported in Solaris Nevada.
36 * PSMI 1.7 extensions are supported in Solaris Nevada.
37 */
38 #define PSMI_1_7
39
40 #include <sys/processor.h>
41 #include <sys/time.h>
42 #include <sys/psm.h>
43 #include <sys/smp_impldefs.h>
44 #include <sys/cram.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/psm_common.h>
48 #include <sys/apic.h>
49 #include <sys/pit.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/pci.h>
54 #include <sys/promif.h>
55 #include <sys/x86_archext.h>
56 #include <sys/cpc_impl.h>
57 #include <sys/uadmin.h>
58 #include <sys/panic.h>
59 #include <sys/debug.h>
60 #include <sys/archsystm.h>
61 #include <sys/trap.h>
62 #include <sys/machsystm.h>
63 #include <sys/sysmacros.h>
64 #include <sys/cpuvar.h>
65 #include <sys/rm_platter.h>
66 #include <sys/privregs.h>
67 #include <sys/note.h>
68 #include <sys/pci_intr_lib.h>
69 #include <sys/spl.h>
70 #include <sys/clock.h>
71 #include <sys/dditypes.h>
72 #include <sys/sunddi.h>
73 #include <sys/x_call.h>
74 #include <sys/reboot.h>
75 #include <sys/hpet.h>
76 #include <sys/apic_common.h>
77 #include <sys/apic_timer.h>
78
79 static void apic_record_ioapic_rdt(void *intrmap_private,
80 ioapic_rdt_t *irdt);
81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
82
83 /*
84 * Common routines between pcplusmp & apix (taken from apic.c).
85 */
86
87 int apic_clkinit(int);
88 hrtime_t apic_gethrtime(void);
89 void apic_send_ipi(int, int);
90 void apic_set_idlecpu(processorid_t);
91 void apic_unset_idlecpu(processorid_t);
92 void apic_shutdown(int, int);
93 void apic_preshutdown(int, int);
94 processorid_t apic_get_next_processorid(processorid_t);
95
96 hrtime_t apic_gettime();
97
98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
99
100 /* Now the ones for Dynamic Interrupt distribution */
101 int apic_enable_dynamic_migration = 0;
102
103 /* maximum loop count when sending Start IPIs. */
104 int apic_sipi_max_loop_count = 0x1000;
105
106 /*
107 * These variables are frequently accessed in apic_intr_enter(),
108 * apic_intr_exit and apic_setspl, so group them together
109 */
110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */
111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */
112 int apic_clkvect;
113
114 /* vector at which error interrupts come in */
115 int apic_errvect;
116 int apic_enable_error_intr = 1;
117 int apic_error_display_delay = 100;
118
119 /* vector at which performance counter overflow interrupts come in */
120 int apic_cpcovf_vect;
121 int apic_enable_cpcovf_intr = 1;
122
123 /* vector at which CMCI interrupts come in */
124 int apic_cmci_vect;
125 extern int cmi_enable_cmci;
126 extern void cmi_cmci_trap(void);
127
128 kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */
129 int cmci_cpu_setup_registered;
130
131 lock_t apic_mode_switch_lock;
132
133 /*
134 * Patchable global variables.
135 */
136 int apic_forceload = 0;
137
138 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */
139
140 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */
141 int apic_panic_on_nmi = 0;
142 int apic_panic_on_apic_error = 0;
143
144 int apic_verbose = 0; /* 0x1ff */
145
146 #ifdef DEBUG
147 int apic_debug = 0;
148 int apic_restrict_vector = 0;
149
150 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
151 int apic_debug_msgbufindex = 0;
152
153 #endif /* DEBUG */
154
155 uint_t apic_nticks = 0;
156 uint_t apic_skipped_redistribute = 0;
157
158 uint_t last_count_read = 0;
159 lock_t apic_gethrtime_lock;
160 volatile int apic_hrtime_stamp = 0;
161 volatile hrtime_t apic_nsec_since_boot = 0;
162
163 static hrtime_t apic_last_hrtime = 0;
164 int apic_hrtime_error = 0;
165 int apic_remote_hrterr = 0;
166 int apic_num_nmis = 0;
167 int apic_apic_error = 0;
168 int apic_num_apic_errors = 0;
169 int apic_num_cksum_errors = 0;
170
171 int apic_error = 0;
172
173 static int apic_cmos_ssb_set = 0;
174
175 /* use to make sure only one cpu handles the nmi */
176 lock_t apic_nmi_lock;
177 /* use to make sure only one cpu handles the error interrupt */
178 lock_t apic_error_lock;
179
180 static struct {
181 uchar_t cntl;
182 uchar_t data;
183 } aspen_bmc[] = {
184 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
185 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
186 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */
187 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */
188 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */
189 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */
190 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */
191 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */
192
193 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
194 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
195 };
196
197 static struct {
198 int port;
199 uchar_t data;
200 } sitka_bmc[] = {
201 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
202 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
203 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
204 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */
205 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */
206 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */
207 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */
208 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */
209 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
210 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */
211
212 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
213 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
214 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
215 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
216 };
217
218 /* Patchable global variables. */
219 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */
220 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */
221
222 /* default apic ops without interrupt remapping */
223 static apic_intrmap_ops_t apic_nointrmap_ops = {
224 (int (*)(int))return_instr,
225 (void (*)(int))return_instr,
226 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
227 (void (*)(void *, void *, uint16_t, int))return_instr,
228 (void (*)(void **))return_instr,
229 apic_record_ioapic_rdt,
230 apic_record_msi,
231 };
232
233 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
234 apic_cpus_info_t *apic_cpus = NULL;
235 cpuset_t apic_cpumask;
236 uint_t apic_picinit_called;
237
238 /* Flag to indicate that we need to shut down all processors */
239 static uint_t apic_shutdown_processors;
240
241 /*
242 * Probe the ioapic method for apix module. Called in apic_probe_common()
243 */
244 int
245 apic_ioapic_method_probe()
246 {
247 if (apix_enable == 0)
248 return (PSM_SUCCESS);
249
250 /*
251 * Set IOAPIC EOI handling method. The priority from low to high is:
252 * 1. IOxAPIC: with EOI register
253 * 2. IOMMU interrupt mapping
254 * 3. Mask-Before-EOI method for systems without boot
255 * interrupt routing, such as systems with only one IOAPIC;
256 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution
257 * which disables the boot interrupt routing already.
258 * 4. Directed EOI
259 */
260 if (apic_io_ver[0] >= 0x20)
261 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
262 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
264 if (apic_directed_EOI_supported())
265 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
266
267 /* fall back to pcplusmp */
268 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
269 /* make sure apix is after pcplusmp in /etc/mach */
270 apix_enable = 0; /* go ahead with pcplusmp install next */
271 return (PSM_FAILURE);
272 }
273
274 return (PSM_SUCCESS);
275 }
276
277 /*
278 * handler for APIC Error interrupt. Just print a warning and continue
279 */
280 int
281 apic_error_intr()
282 {
283 uint_t error0, error1, error;
284 uint_t i;
285
286 /*
287 * We need to write before read as per 7.4.17 of system prog manual.
288 * We do both and or the results to be safe
289 */
290 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
291 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
292 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
293 error = error0 | error1;
294
295 /*
296 * Clear the APIC error status (do this on all cpus that enter here)
297 * (two writes are required due to the semantics of accessing the
298 * error status register.)
299 */
300 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
301 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
302
303 /*
304 * Prevent more than 1 CPU from handling error interrupt causing
305 * double printing (interleave of characters from multiple
306 * CPU's when using prom_printf)
307 */
308 if (lock_try(&apic_error_lock) == 0)
309 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
310 if (error) {
311 #if DEBUG
312 if (apic_debug)
313 debug_enter("pcplusmp: APIC Error interrupt received");
314 #endif /* DEBUG */
315 if (apic_panic_on_apic_error)
316 cmn_err(CE_PANIC,
317 "APIC Error interrupt on CPU %d. Status = %x",
318 psm_get_cpu_id(), error);
319 else {
320 if ((error & ~APIC_CS_ERRORS) == 0) {
321 /* cksum error only */
322 apic_error |= APIC_ERR_APIC_ERROR;
323 apic_apic_error |= error;
324 apic_num_apic_errors++;
325 apic_num_cksum_errors++;
326 } else {
327 /*
328 * prom_printf is the best shot we have of
329 * something which is problem free from
330 * high level/NMI type of interrupts
331 */
332 prom_printf("APIC Error interrupt on CPU %d. "
333 "Status 0 = %x, Status 1 = %x\n",
334 psm_get_cpu_id(), error0, error1);
335 apic_error |= APIC_ERR_APIC_ERROR;
336 apic_apic_error |= error;
337 apic_num_apic_errors++;
338 for (i = 0; i < apic_error_display_delay; i++) {
339 tenmicrosec();
340 }
341 /*
342 * provide more delay next time limited to
343 * roughly 1 clock tick time
344 */
345 if (apic_error_display_delay < 500)
346 apic_error_display_delay *= 2;
347 }
348 }
349 lock_clear(&apic_error_lock);
350 return (DDI_INTR_CLAIMED);
351 } else {
352 lock_clear(&apic_error_lock);
353 return (DDI_INTR_UNCLAIMED);
354 }
355 }
356
357 /*
358 * Turn off the mask bit in the performance counter Local Vector Table entry.
359 */
360 void
361 apic_cpcovf_mask_clear(void)
362 {
363 apic_reg_ops->apic_write(APIC_PCINT_VECT,
364 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
365 }
366
367 /*ARGSUSED*/
368 static int
369 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
370 {
371 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
372 return (0);
373 }
374
375 /*ARGSUSED*/
376 static int
377 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
378 {
379 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
380 return (0);
381 }
382
383 /*ARGSUSED*/
384 int
385 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
386 {
387 cpuset_t cpu_set;
388
389 CPUSET_ONLY(cpu_set, cpuid);
390
391 switch (what) {
392 case CPU_ON:
393 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
394 (xc_func_t)apic_cmci_enable);
395 break;
396
397 case CPU_OFF:
398 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
399 (xc_func_t)apic_cmci_disable);
400 break;
401
402 default:
403 break;
404 }
405
406 return (0);
407 }
408
409 static void
410 apic_disable_local_apic(void)
411 {
412 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
413 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
414
415 /* local intr reg 0 */
416 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
417
418 /* disable NMI */
419 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
420
421 /* and error interrupt */
422 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
423
424 /* and perf counter intr */
425 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
426
427 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
428 }
429
430 static void
431 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
432 {
433 int loop_count;
434 uint32_t vector;
435 uint_t apicid;
436 ulong_t iflag;
437
438 apicid = apic_cpus[cpun].aci_local_id;
439
440 /*
441 * Interrupts on current CPU will be disabled during the
442 * steps in order to avoid unwanted side effects from
443 * executing interrupt handlers on a problematic BIOS.
444 */
445 iflag = intr_clear();
446
447 if (start) {
448 outb(CMOS_ADDR, SSB);
449 outb(CMOS_DATA, BIOS_SHUTDOWN);
450 }
451
452 /*
453 * According to X2APIC specification in section '2.3.5.1' of
454 * Interrupt Command Register Semantics, the semantics of
455 * programming the Interrupt Command Register to dispatch an interrupt
456 * is simplified. A single MSR write to the 64-bit ICR is required
457 * for dispatching an interrupt. Specifically, with the 64-bit MSR
458 * interface to ICR, system software is not required to check the
459 * status of the delivery status bit prior to writing to the ICR
460 * to send an IPI. With the removal of the Delivery Status bit,
461 * system software no longer has a reason to read the ICR. It remains
462 * readable only to aid in debugging.
463 */
464 #ifdef DEBUG
465 APIC_AV_PENDING_SET();
466 #else
467 if (apic_mode == LOCAL_APIC) {
468 APIC_AV_PENDING_SET();
469 }
470 #endif /* DEBUG */
471
472 /* for integrated - make sure there is one INIT IPI in buffer */
473 /* for external - it will wake up the cpu */
474 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
475
476 /* If only 1 CPU is installed, PENDING bit will not go low */
477 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
478 if (apic_mode == LOCAL_APIC &&
479 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
480 apic_ret();
481 else
482 break;
483 }
484
485 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
486 drv_usecwait(20000); /* 20 milli sec */
487
488 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
489 /* integrated apic */
490
491 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
492 (APIC_VECTOR_MASK | APIC_IPL_MASK);
493
494 /* to offset the INIT IPI queue up in the buffer */
495 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
496 drv_usecwait(200); /* 20 micro sec */
497
498 /*
499 * send the second SIPI (Startup IPI) as recommended by Intel
500 * software development manual.
501 */
502 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
503 drv_usecwait(200); /* 20 micro sec */
504 }
505
506 intr_restore(iflag);
507 }
508
509 /*ARGSUSED1*/
510 int
511 apic_cpu_start(processorid_t cpun, caddr_t arg)
512 {
513 ASSERT(MUTEX_HELD(&cpu_lock));
514
515 if (!apic_cpu_in_range(cpun)) {
516 return (EINVAL);
517 }
518
519 /*
520 * Switch to apic_common_send_ipi for safety during starting other CPUs.
521 */
522 if (apic_mode == LOCAL_X2APIC) {
523 apic_switch_ipi_callback(B_TRUE);
524 }
525
526 apic_cmos_ssb_set = 1;
527 apic_cpu_send_SIPI(cpun, B_TRUE);
528
529 return (0);
530 }
531
532 /*
533 * Put CPU into halted state with interrupts disabled.
534 */
535 /*ARGSUSED1*/
536 int
537 apic_cpu_stop(processorid_t cpun, caddr_t arg)
538 {
539 int rc;
540 cpu_t *cp;
541 extern cpuset_t cpu_ready_set;
542 extern void cpu_idle_intercept_cpu(cpu_t *cp);
543
544 ASSERT(MUTEX_HELD(&cpu_lock));
545
546 if (!apic_cpu_in_range(cpun)) {
547 return (EINVAL);
548 }
549 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
550 return (ENOTSUP);
551 }
552
553 cp = cpu_get(cpun);
554 ASSERT(cp != NULL);
555 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
556 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
557 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
558
559 /* Clear CPU_READY flag to disable cross calls. */
560 cp->cpu_flags &= ~CPU_READY;
561 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
562 rc = xc_flush_cpu(cp);
563 if (rc != 0) {
564 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
565 cp->cpu_flags |= CPU_READY;
566 return (rc);
567 }
568
569 /* Intercept target CPU at a safe point before powering it off. */
570 cpu_idle_intercept_cpu(cp);
571
572 apic_cpu_send_SIPI(cpun, B_FALSE);
573 cp->cpu_flags &= ~CPU_RUNNING;
574
575 return (0);
576 }
577
578 int
579 apic_cpu_ops(psm_cpu_request_t *reqp)
580 {
581 if (reqp == NULL) {
582 return (EINVAL);
583 }
584
585 switch (reqp->pcr_cmd) {
586 case PSM_CPU_ADD:
587 return (apic_cpu_add(reqp));
588
589 case PSM_CPU_REMOVE:
590 return (apic_cpu_remove(reqp));
591
592 case PSM_CPU_STOP:
593 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
594 reqp->req.cpu_stop.ctx));
595
596 default:
597 return (ENOTSUP);
598 }
599 }
600
601 #ifdef DEBUG
602 int apic_break_on_cpu = 9;
603 int apic_stretch_interrupts = 0;
604 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */
605 #endif /* DEBUG */
606
607 /*
608 * generates an interprocessor interrupt to another CPU. Any changes made to
609 * this routine must be accompanied by similar changes to
610 * apic_common_send_ipi().
611 */
612 void
613 apic_send_ipi(int cpun, int ipl)
614 {
615 int vector;
616 ulong_t flag;
617
618 vector = apic_resv_vector[ipl];
619
620 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
621
622 flag = intr_clear();
623
624 APIC_AV_PENDING_SET();
625
626 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
627 vector);
628
629 intr_restore(flag);
630 }
631
632
633 /*ARGSUSED*/
634 void
635 apic_set_idlecpu(processorid_t cpun)
636 {
637 }
638
639 /*ARGSUSED*/
640 void
641 apic_unset_idlecpu(processorid_t cpun)
642 {
643 }
644
645
646 void
647 apic_ret()
648 {
649 }
650
651 /*
652 * If apic_coarse_time == 1, then apic_gettime() is used instead of
653 * apic_gethrtime(). This is used for performance instead of accuracy.
654 */
655
656 hrtime_t
657 apic_gettime()
658 {
659 int old_hrtime_stamp;
660 hrtime_t temp;
661
662 /*
663 * In one-shot mode, we do not keep time, so if anyone
664 * calls psm_gettime() directly, we vector over to
665 * gethrtime().
666 * one-shot mode MUST NOT be enabled if this psm is the source of
667 * hrtime.
668 */
669
670 if (apic_oneshot)
671 return (gethrtime());
672
673
674 gettime_again:
675 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
676 apic_ret();
677
678 temp = apic_nsec_since_boot;
679
680 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
681 goto gettime_again;
682 }
683 return (temp);
684 }
685
686 /*
687 * Here we return the number of nanoseconds since booting. Note every
688 * clock interrupt increments apic_nsec_since_boot by the appropriate
689 * amount.
690 */
691 hrtime_t
692 apic_gethrtime(void)
693 {
694 int curr_timeval, countval, elapsed_ticks;
695 int old_hrtime_stamp, status;
696 hrtime_t temp;
697 uint32_t cpun;
698 ulong_t oflags;
699
700 /*
701 * In one-shot mode, we do not keep time, so if anyone
702 * calls psm_gethrtime() directly, we vector over to
703 * gethrtime().
704 * one-shot mode MUST NOT be enabled if this psm is the source of
705 * hrtime.
706 */
707
708 if (apic_oneshot)
709 return (gethrtime());
710
711 oflags = intr_clear(); /* prevent migration */
712
713 cpun = apic_reg_ops->apic_read(APIC_LID_REG);
714 if (apic_mode == LOCAL_APIC)
715 cpun >>= APIC_ID_BIT_OFFSET;
716
717 lock_set(&apic_gethrtime_lock);
718
719 gethrtime_again:
720 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
721 apic_ret();
722
723 /*
724 * Check to see which CPU we are on. Note the time is kept on
725 * the local APIC of CPU 0. If on CPU 0, simply read the current
726 * counter. If on another CPU, issue a remote read command to CPU 0.
727 */
728 if (cpun == apic_cpus[0].aci_local_id) {
729 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
730 } else {
731 #ifdef DEBUG
732 APIC_AV_PENDING_SET();
733 #else
734 if (apic_mode == LOCAL_APIC)
735 APIC_AV_PENDING_SET();
736 #endif /* DEBUG */
737
738 apic_reg_ops->apic_write_int_cmd(
739 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
740
741 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
742 & AV_READ_PENDING) {
743 apic_ret();
744 }
745
746 if (status & AV_REMOTE_STATUS) /* 1 = valid */
747 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
748 else { /* 0 = invalid */
749 apic_remote_hrterr++;
750 /*
751 * return last hrtime right now, will need more
752 * testing if change to retry
753 */
754 temp = apic_last_hrtime;
755
756 lock_clear(&apic_gethrtime_lock);
757
758 intr_restore(oflags);
759
760 return (temp);
761 }
762 }
763 if (countval > last_count_read)
764 countval = 0;
765 else
766 last_count_read = countval;
767
768 elapsed_ticks = apic_hertz_count - countval;
769
770 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
771 temp = apic_nsec_since_boot + curr_timeval;
772
773 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
774 /* we might have clobbered last_count_read. Restore it */
775 last_count_read = apic_hertz_count;
776 goto gethrtime_again;
777 }
778
779 if (temp < apic_last_hrtime) {
780 /* return last hrtime if error occurs */
781 apic_hrtime_error++;
782 temp = apic_last_hrtime;
783 }
784 else
785 apic_last_hrtime = temp;
786
787 lock_clear(&apic_gethrtime_lock);
788 intr_restore(oflags);
789
790 return (temp);
791 }
792
793 /* apic NMI handler */
794 /*ARGSUSED*/
795 void
796 apic_nmi_intr(caddr_t arg, struct regs *rp)
797 {
798 if (apic_shutdown_processors) {
799 apic_disable_local_apic();
800 return;
801 }
802
803 apic_error |= APIC_ERR_NMI;
804
805 if (!lock_try(&apic_nmi_lock))
806 return;
807 apic_num_nmis++;
808
809 if (apic_kmdb_on_nmi && psm_debugger()) {
810 debug_enter("NMI received: entering kmdb\n");
811 } else if (apic_panic_on_nmi) {
812 /* Keep panic from entering kmdb. */
813 nopanicdebug = 1;
814 panic("NMI received\n");
815 } else {
816 /*
817 * prom_printf is the best shot we have of something which is
818 * problem free from high level/NMI type of interrupts
819 */
820 prom_printf("NMI received\n");
821 }
822
823 lock_clear(&apic_nmi_lock);
824 }
825
826 processorid_t
827 apic_get_next_processorid(processorid_t cpu_id)
828 {
829
830 int i;
831
832 if (cpu_id == -1)
833 return ((processorid_t)0);
834
835 for (i = cpu_id + 1; i < NCPU; i++) {
836 if (apic_cpu_in_range(i))
837 return (i);
838 }
839
840 return ((processorid_t)-1);
841 }
842
843 int
844 apic_cpu_add(psm_cpu_request_t *reqp)
845 {
846 int i, rv = 0;
847 ulong_t iflag;
848 boolean_t first = B_TRUE;
849 uchar_t localver;
850 uint32_t localid, procid;
851 processorid_t cpuid = (processorid_t)-1;
852 mach_cpu_add_arg_t *ap;
853
854 ASSERT(reqp != NULL);
855 reqp->req.cpu_add.cpuid = (processorid_t)-1;
856
857 /* Check whether CPU hotplug is supported. */
858 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
859 return (ENOTSUP);
860 }
861
862 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
863 switch (ap->type) {
864 case MACH_CPU_ARG_LOCAL_APIC:
865 localid = ap->arg.apic.apic_id;
866 procid = ap->arg.apic.proc_id;
867 if (localid >= 255 || procid > 255) {
868 cmn_err(CE_WARN,
869 "!apic: apicid(%u) or procid(%u) is invalid.",
870 localid, procid);
871 return (EINVAL);
872 }
873 break;
874
875 case MACH_CPU_ARG_LOCAL_X2APIC:
876 localid = ap->arg.apic.apic_id;
877 procid = ap->arg.apic.proc_id;
878 if (localid >= UINT32_MAX) {
879 cmn_err(CE_WARN,
880 "!apic: x2apicid(%u) is invalid.", localid);
881 return (EINVAL);
882 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
883 cmn_err(CE_WARN, "!apic: system is in APIC mode, "
884 "can't support x2APIC processor.");
885 return (ENOTSUP);
886 }
887 break;
888
889 default:
890 cmn_err(CE_WARN,
891 "!apic: unknown argument type %d to apic_cpu_add().",
892 ap->type);
893 return (EINVAL);
894 }
895
896 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
897 iflag = intr_clear();
898 lock_set(&apic_ioapic_lock);
899
900 /* Check whether local APIC id already exists. */
901 for (i = 0; i < apic_nproc; i++) {
902 if (!CPU_IN_SET(apic_cpumask, i))
903 continue;
904 if (apic_cpus[i].aci_local_id == localid) {
905 lock_clear(&apic_ioapic_lock);
906 intr_restore(iflag);
907 cmn_err(CE_WARN,
908 "!apic: local apic id %u already exists.",
909 localid);
910 return (EEXIST);
911 } else if (apic_cpus[i].aci_processor_id == procid) {
912 lock_clear(&apic_ioapic_lock);
913 intr_restore(iflag);
914 cmn_err(CE_WARN,
915 "!apic: processor id %u already exists.",
916 (int)procid);
917 return (EEXIST);
918 }
919
920 /*
921 * There's no local APIC version number available in MADT table,
922 * so assume that all CPUs are homogeneous and use local APIC
923 * version number of the first existing CPU.
924 */
925 if (first) {
926 first = B_FALSE;
927 localver = apic_cpus[i].aci_local_ver;
928 }
929 }
930 ASSERT(first == B_FALSE);
931
932 /*
933 * Try to assign the same cpuid if APIC id exists in the dirty cache.
934 */
935 for (i = 0; i < apic_max_nproc; i++) {
936 if (CPU_IN_SET(apic_cpumask, i)) {
937 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
938 continue;
939 }
940 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
941 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
942 apic_cpus[i].aci_local_id == localid &&
943 apic_cpus[i].aci_processor_id == procid) {
944 cpuid = i;
945 break;
946 }
947 }
948
949 /* Avoid the dirty cache and allocate fresh slot if possible. */
950 if (cpuid == (processorid_t)-1) {
951 for (i = 0; i < apic_max_nproc; i++) {
952 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
953 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
954 cpuid = i;
955 break;
956 }
957 }
958 }
959
960 /* Try to find any free slot as last resort. */
961 if (cpuid == (processorid_t)-1) {
962 for (i = 0; i < apic_max_nproc; i++) {
963 if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
964 cpuid = i;
965 break;
966 }
967 }
968 }
969
970 if (cpuid == (processorid_t)-1) {
971 lock_clear(&apic_ioapic_lock);
972 intr_restore(iflag);
973 cmn_err(CE_NOTE,
974 "!apic: failed to allocate cpu id for processor %u.",
975 procid);
976 rv = EAGAIN;
977 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
978 lock_clear(&apic_ioapic_lock);
979 intr_restore(iflag);
980 cmn_err(CE_NOTE,
981 "!apic: failed to build mapping for processor %u.",
982 procid);
983 rv = EBUSY;
984 } else {
985 ASSERT(cpuid >= 0 && cpuid < NCPU);
986 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
987 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
988 apic_cpus[cpuid].aci_processor_id = procid;
989 apic_cpus[cpuid].aci_local_id = localid;
990 apic_cpus[cpuid].aci_local_ver = localver;
991 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
992 if (cpuid >= apic_nproc) {
993 apic_nproc = cpuid + 1;
994 }
995 lock_clear(&apic_ioapic_lock);
996 intr_restore(iflag);
997 reqp->req.cpu_add.cpuid = cpuid;
998 }
999
1000 return (rv);
1001 }
1002
1003 int
1004 apic_cpu_remove(psm_cpu_request_t *reqp)
1005 {
1006 int i;
1007 ulong_t iflag;
1008 processorid_t cpuid;
1009
1010 /* Check whether CPU hotplug is supported. */
1011 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1012 return (ENOTSUP);
1013 }
1014
1015 cpuid = reqp->req.cpu_remove.cpuid;
1016
1017 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1018 iflag = intr_clear();
1019 lock_set(&apic_ioapic_lock);
1020
1021 if (!apic_cpu_in_range(cpuid)) {
1022 lock_clear(&apic_ioapic_lock);
1023 intr_restore(iflag);
1024 cmn_err(CE_WARN,
1025 "!apic: cpuid %d doesn't exist in apic_cpus array.",
1026 cpuid);
1027 return (ENODEV);
1028 }
1029 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1030
1031 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1032 lock_clear(&apic_ioapic_lock);
1033 intr_restore(iflag);
1034 return (ENOENT);
1035 }
1036
1037 if (cpuid == apic_nproc - 1) {
1038 /*
1039 * We are removing the highest numbered cpuid so we need to
1040 * find the next highest cpuid as the new value for apic_nproc.
1041 */
1042 for (i = apic_nproc; i > 0; i--) {
1043 if (CPU_IN_SET(apic_cpumask, i - 1)) {
1044 apic_nproc = i;
1045 break;
1046 }
1047 }
1048 /* at least one CPU left */
1049 ASSERT(i > 0);
1050 }
1051 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1052 /* mark slot as free and keep it in the dirty cache */
1053 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1054
1055 lock_clear(&apic_ioapic_lock);
1056 intr_restore(iflag);
1057
1058 return (0);
1059 }
1060
1061 /*
1062 * Return the number of APIC clock ticks elapsed for 8245 to decrement
1063 * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1064 */
1065 uint_t
1066 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1067 {
1068 uint8_t pit_tick_lo;
1069 uint16_t pit_tick, target_pit_tick;
1070 uint32_t start_apic_tick, end_apic_tick;
1071 ulong_t iflag;
1072 uint32_t reg;
1073
1074 reg = addr + APIC_CURR_COUNT - apicadr;
1075
1076 iflag = intr_clear();
1077
1078 do {
1079 pit_tick_lo = inb(PITCTR0_PORT);
1080 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1081 } while (pit_tick < APIC_TIME_MIN ||
1082 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1083
1084 /*
1085 * Wait for the 8254 to decrement by 5 ticks to ensure
1086 * we didn't start in the middle of a tick.
1087 * Compare with 0x10 for the wrap around case.
1088 */
1089 target_pit_tick = pit_tick - 5;
1090 do {
1091 pit_tick_lo = inb(PITCTR0_PORT);
1092 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1093 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1094
1095 start_apic_tick = apic_reg_ops->apic_read(reg);
1096
1097 /*
1098 * Wait for the 8254 to decrement by
1099 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1100 */
1101 target_pit_tick = pit_tick - APIC_TIME_COUNT;
1102 do {
1103 pit_tick_lo = inb(PITCTR0_PORT);
1104 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1105 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1106
1107 end_apic_tick = apic_reg_ops->apic_read(reg);
1108
1109 *pit_ticks_adj = target_pit_tick - pit_tick;
1110
1111 intr_restore(iflag);
1112
1113 return (start_apic_tick - end_apic_tick);
1114 }
1115
1116 /*
1117 * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1118 * frequency. Note at this stage in the boot sequence, the boot processor
1119 * is the only active processor.
1120 * hertz value of 0 indicates a one-shot mode request. In this case
1121 * the function returns the resolution (in nanoseconds) for the hardware
1122 * timer interrupt. If one-shot mode capability is not available,
1123 * the return value will be 0. apic_enable_oneshot is a global switch
1124 * for disabling the functionality.
1125 * A non-zero positive value for hertz indicates a periodic mode request.
1126 * In this case the hardware will be programmed to generate clock interrupts
1127 * at hertz frequency and returns the resolution of interrupts in
1128 * nanosecond.
1129 */
1130
1131 int
1132 apic_clkinit(int hertz)
1133 {
1134 int ret;
1135
1136 apic_int_busy_mark = (apic_int_busy_mark *
1137 apic_sample_factor_redistribution) / 100;
1138 apic_int_free_mark = (apic_int_free_mark *
1139 apic_sample_factor_redistribution) / 100;
1140 apic_diff_for_redistribution = (apic_diff_for_redistribution *
1141 apic_sample_factor_redistribution) / 100;
1142
1143 ret = apic_timer_init(hertz);
1144 return (ret);
1145
1146 }
1147
1148 /*
1149 * apic_preshutdown:
1150 * Called early in shutdown whilst we can still access filesystems to do
1151 * things like loading modules which will be required to complete shutdown
1152 * after filesystems are all unmounted.
1153 */
1154 void
1155 apic_preshutdown(int cmd, int fcn)
1156 {
1157 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1158 cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1159 }
1160
1161 void
1162 apic_shutdown(int cmd, int fcn)
1163 {
1164 int restarts, attempts;
1165 int i;
1166 uchar_t byte;
1167 ulong_t iflag;
1168
1169 hpet_acpi_fini();
1170
1171 /* Send NMI to all CPUs except self to do per processor shutdown */
1172 iflag = intr_clear();
1173 #ifdef DEBUG
1174 APIC_AV_PENDING_SET();
1175 #else
1176 if (apic_mode == LOCAL_APIC)
1177 APIC_AV_PENDING_SET();
1178 #endif /* DEBUG */
1179 apic_shutdown_processors = 1;
1180 apic_reg_ops->apic_write(APIC_INT_CMD1,
1181 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1182
1183 /* restore cmos shutdown byte before reboot */
1184 if (apic_cmos_ssb_set) {
1185 outb(CMOS_ADDR, SSB);
1186 outb(CMOS_DATA, 0);
1187 }
1188
1189 ioapic_disable_redirection();
1190
1191 /* disable apic mode if imcr present */
1192 if (apic_imcrp) {
1193 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1194 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1195 }
1196
1197 apic_disable_local_apic();
1198
1199 intr_restore(iflag);
1200
1201 /* remainder of function is for shutdown cases only */
1202 if (cmd != A_SHUTDOWN)
1203 return;
1204
1205 /*
1206 * Switch system back into Legacy-Mode if using ACPI and
1207 * not powering-off. Some BIOSes need to remain in ACPI-mode
1208 * for power-off to succeed (Dell Dimension 4600)
1209 * Do not disable ACPI while doing fastreboot
1210 */
1211 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1212 (void) AcpiDisable();
1213
1214 if (fcn == AD_FASTREBOOT) {
1215 apic_reg_ops->apic_write(APIC_INT_CMD1,
1216 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1217 }
1218
1219 /* remainder of function is for shutdown+poweroff case only */
1220 if (fcn != AD_POWEROFF)
1221 return;
1222
1223 switch (apic_poweroff_method) {
1224 case APIC_POWEROFF_VIA_RTC:
1225
1226 /* select the extended NVRAM bank in the RTC */
1227 outb(CMOS_ADDR, RTC_REGA);
1228 byte = inb(CMOS_DATA);
1229 outb(CMOS_DATA, (byte | EXT_BANK));
1230
1231 outb(CMOS_ADDR, PFR_REG);
1232
1233 /* for Predator must toggle the PAB bit */
1234 byte = inb(CMOS_DATA);
1235
1236 /*
1237 * clear power active bar, wakeup alarm and
1238 * kickstart
1239 */
1240 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1241 outb(CMOS_DATA, byte);
1242
1243 /* delay before next write */
1244 drv_usecwait(1000);
1245
1246 /* for S40 the following would suffice */
1247 byte = inb(CMOS_DATA);
1248
1249 /* power active bar control bit */
1250 byte |= PAB_CBIT;
1251 outb(CMOS_DATA, byte);
1252
1253 break;
1254
1255 case APIC_POWEROFF_VIA_ASPEN_BMC:
1256 restarts = 0;
1257 restart_aspen_bmc:
1258 if (++restarts == 3)
1259 break;
1260 attempts = 0;
1261 do {
1262 byte = inb(MISMIC_FLAG_REGISTER);
1263 byte &= MISMIC_BUSY_MASK;
1264 if (byte != 0) {
1265 drv_usecwait(1000);
1266 if (attempts >= 3)
1267 goto restart_aspen_bmc;
1268 ++attempts;
1269 }
1270 } while (byte != 0);
1271 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1272 byte = inb(MISMIC_FLAG_REGISTER);
1273 byte |= 0x1;
1274 outb(MISMIC_FLAG_REGISTER, byte);
1275 i = 0;
1276 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1277 i++) {
1278 attempts = 0;
1279 do {
1280 byte = inb(MISMIC_FLAG_REGISTER);
1281 byte &= MISMIC_BUSY_MASK;
1282 if (byte != 0) {
1283 drv_usecwait(1000);
1284 if (attempts >= 3)
1285 goto restart_aspen_bmc;
1286 ++attempts;
1287 }
1288 } while (byte != 0);
1289 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1290 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1291 byte = inb(MISMIC_FLAG_REGISTER);
1292 byte |= 0x1;
1293 outb(MISMIC_FLAG_REGISTER, byte);
1294 }
1295 break;
1296
1297 case APIC_POWEROFF_VIA_SITKA_BMC:
1298 restarts = 0;
1299 restart_sitka_bmc:
1300 if (++restarts == 3)
1301 break;
1302 attempts = 0;
1303 do {
1304 byte = inb(SMS_STATUS_REGISTER);
1305 byte &= SMS_STATE_MASK;
1306 if ((byte == SMS_READ_STATE) ||
1307 (byte == SMS_WRITE_STATE)) {
1308 drv_usecwait(1000);
1309 if (attempts >= 3)
1310 goto restart_sitka_bmc;
1311 ++attempts;
1312 }
1313 } while ((byte == SMS_READ_STATE) ||
1314 (byte == SMS_WRITE_STATE));
1315 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1316 i = 0;
1317 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1318 i++) {
1319 attempts = 0;
1320 do {
1321 byte = inb(SMS_STATUS_REGISTER);
1322 byte &= SMS_IBF_MASK;
1323 if (byte != 0) {
1324 drv_usecwait(1000);
1325 if (attempts >= 3)
1326 goto restart_sitka_bmc;
1327 ++attempts;
1328 }
1329 } while (byte != 0);
1330 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1331 }
1332 break;
1333
1334 case APIC_POWEROFF_NONE:
1335
1336 /* If no APIC direct method, we will try using ACPI */
1337 if (apic_enable_acpi) {
1338 if (acpi_poweroff() == 1)
1339 return;
1340 } else
1341 return;
1342
1343 break;
1344 }
1345 /*
1346 * Wait a limited time here for power to go off.
1347 * If the power does not go off, then there was a
1348 * problem and we should continue to the halt which
1349 * prints a message for the user to press a key to
1350 * reboot.
1351 */
1352 drv_usecwait(7000000); /* wait seven seconds */
1353
1354 }
1355
1356 cyclic_id_t apic_cyclic_id;
1357
1358 /*
1359 * The following functions are in the platform specific file so that they
1360 * can be different functions depending on whether we are running on
1361 * bare metal or a hypervisor.
1362 */
1363
1364 /*
1365 * map an apic for memory-mapped access
1366 */
1367 uint32_t *
1368 mapin_apic(uint32_t addr, size_t len, int flags)
1369 {
1370 return ((void *)psm_map_phys(addr, len, flags));
1371 }
1372
1373 uint32_t *
1374 mapin_ioapic(uint32_t addr, size_t len, int flags)
1375 {
1376 return (mapin_apic(addr, len, flags));
1377 }
1378
1379 /*
1380 * unmap an apic
1381 */
1382 void
1383 mapout_apic(caddr_t addr, size_t len)
1384 {
1385 psm_unmap_phys(addr, len);
1386 }
1387
1388 void
1389 mapout_ioapic(caddr_t addr, size_t len)
1390 {
1391 mapout_apic(addr, len);
1392 }
1393
1394 uint32_t
1395 ioapic_read(int ioapic_ix, uint32_t reg)
1396 {
1397 volatile uint32_t *ioapic;
1398
1399 ioapic = apicioadr[ioapic_ix];
1400 ioapic[APIC_IO_REG] = reg;
1401 return (ioapic[APIC_IO_DATA]);
1402 }
1403
1404 void
1405 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1406 {
1407 volatile uint32_t *ioapic;
1408
1409 ioapic = apicioadr[ioapic_ix];
1410 ioapic[APIC_IO_REG] = reg;
1411 ioapic[APIC_IO_DATA] = value;
1412 }
1413
1414 void
1415 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1416 {
1417 volatile uint32_t *ioapic;
1418
1419 ioapic = apicioadr[ioapic_ix];
1420 ioapic[APIC_IO_EOI] = value;
1421 }
1422
1423 /*
1424 * Round-robin algorithm to find the next CPU with interrupts enabled.
1425 * It can't share the same static variable apic_next_bind_cpu with
1426 * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1427 * bound to CPU1 at boot time. During boot, only CPU0 is online with
1428 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1429 * are called. However, the pcplusmp driver assumes that there will be
1430 * boot_ncpus CPUs configured eventually so it tries to distribute all
1431 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all
1432 * interrupts being targetted at CPU1, we need to use a dedicated static
1433 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1434 */
1435
1436 processorid_t
1437 apic_find_cpu(int flag)
1438 {
1439 int i;
1440 static processorid_t acid = 0;
1441
1442 /* Find the first CPU with the passed-in flag set */
1443 for (i = 0; i < apic_nproc; i++) {
1444 if (++acid >= apic_nproc) {
1445 acid = 0;
1446 }
1447 if (apic_cpu_in_range(acid) &&
1448 (apic_cpus[acid].aci_status & flag)) {
1449 break;
1450 }
1451 }
1452
1453 ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1454 return (acid);
1455 }
1456
1457 void
1458 apic_intrmap_init(int apic_mode)
1459 {
1460 int suppress_brdcst_eoi = 0;
1461
1462 /*
1463 * Intel Software Developer's Manual 3A, 10.12.7:
1464 *
1465 * Routing of device interrupts to local APIC units operating in
1466 * x2APIC mode requires use of the interrupt-remapping architecture
1467 * specified in the Intel Virtualization Technology for Directed
1468 * I/O, Revision 1.3. Because of this, BIOS must enumerate support
1469 * for and software must enable this interrupt remapping with
1470 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1471 * the local APIC units.
1472 *
1473 *
1474 * In other words, to use the APIC in x2APIC mode, we need interrupt
1475 * remapping. Since we don't start up the IOMMU by default, we
1476 * won't be able to do any interrupt remapping and therefore have to
1477 * use the APIC in traditional 'local APIC' mode with memory mapped
1478 * I/O.
1479 */
1480
1481 if (psm_vt_ops != NULL) {
1482 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1483 apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1484
1485 apic_vt_ops = psm_vt_ops;
1486
1487 /*
1488 * We leverage the interrupt remapping engine to
1489 * suppress broadcast EOI; thus we must send the
1490 * directed EOI with the directed-EOI handler.
1491 */
1492 if (apic_directed_EOI_supported() == 0) {
1493 suppress_brdcst_eoi = 1;
1494 }
1495
1496 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1497
1498 if (apic_detect_x2apic()) {
1499 apic_enable_x2apic();
1500 }
1501
1502 if (apic_directed_EOI_supported() == 0) {
1503 apic_set_directed_EOI_handler();
1504 }
1505 }
1506 }
1507 }
1508
1509 /*ARGSUSED*/
1510 static void
1511 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1512 {
1513 irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1514 }
1515
1516 /*ARGSUSED*/
1517 static void
1518 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1519 {
1520 mregs->mr_addr = MSI_ADDR_HDR |
1521 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1522 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1523 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1524 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1525 mregs->mr_data;
1526 }
1527
1528 /*
1529 * Functions from apic_introp.c
1530 *
1531 * Those functions are used by apic_intr_ops().
1532 */
1533
1534 /*
1535 * MSI support flag:
1536 * reflects whether MSI is supported at APIC level
1537 * it can also be patched through /etc/system
1538 *
1539 * 0 = default value - don't know and need to call apic_check_msi_support()
1540 * to find out then set it accordingly
1541 * 1 = supported
1542 * -1 = not supported
1543 */
1544 int apic_support_msi = 0;
1545
1546 /* Multiple vector support for MSI-X */
1547 int apic_msix_enable = 1;
1548
1549 /* Multiple vector support for MSI */
1550 int apic_multi_msi_enable = 1;
1551
1552 /*
1553 * Check whether the system supports MSI.
1554 *
1555 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1556 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1557 * return PSM_SUCCESS to indicate this system supports MSI.
1558 *
1559 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1560 * by detecting if we are running inside the KVM hypervisor, which guarantees
1561 * this version number.)
1562 */
1563 int
1564 apic_check_msi_support()
1565 {
1566 dev_info_t *cdip;
1567 char dev_type[16];
1568 int dev_len;
1569
1570 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1571
1572 /*
1573 * check whether the first level children of root_node have
1574 * PCI-E or PCI capability.
1575 */
1576 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1577 cdip = ddi_get_next_sibling(cdip)) {
1578
1579 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1580 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1581 ddi_driver_name(cdip), ddi_binding_name(cdip),
1582 ddi_node_name(cdip)));
1583 dev_len = sizeof (dev_type);
1584 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1585 "device_type", (caddr_t)dev_type, &dev_len)
1586 != DDI_PROP_SUCCESS)
1587 continue;
1588 if (strcmp(dev_type, "pciex") == 0)
1589 return (PSM_SUCCESS);
1590 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
1591 return (PSM_SUCCESS);
1592 }
1593
1594 /* MSI is not supported on this system */
1595 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1596 "device_type found\n"));
1597 return (PSM_FAILURE);
1598 }
1599
1600 /*
1601 * apic_pci_msi_unconfigure:
1602 *
1603 * This and next two interfaces are copied from pci_intr_lib.c
1604 * Do ensure that these two files stay in sync.
1605 * These needed to be copied over here to avoid a deadlock situation on
1606 * certain mp systems that use MSI interrupts.
1607 *
1608 * IMPORTANT regards next three interfaces:
1609 * i) are called only for MSI/X interrupts.
1610 * ii) called with interrupts disabled, and must not block
1611 */
1612 void
1613 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1614 {
1615 ushort_t msi_ctrl;
1616 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1617 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1618
1619 ASSERT((handle != NULL) && (cap_ptr != 0));
1620
1621 if (type == DDI_INTR_TYPE_MSI) {
1622 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1623 msi_ctrl &= (~PCI_MSI_MME_MASK);
1624 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1625 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1626
1627 if (msi_ctrl & PCI_MSI_64BIT_MASK) {
1628 pci_config_put16(handle,
1629 cap_ptr + PCI_MSI_64BIT_DATA, 0);
1630 pci_config_put32(handle,
1631 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1632 } else {
1633 pci_config_put16(handle,
1634 cap_ptr + PCI_MSI_32BIT_DATA, 0);
1635 }
1636
1637 } else if (type == DDI_INTR_TYPE_MSIX) {
1638 uintptr_t off;
1639 uint32_t mask;
1640 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1641
1642 ASSERT(msix_p != NULL);
1643
1644 /* Offset into "inum"th entry in the MSI-X table & mask it */
1645 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1646 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1647
1648 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1649
1650 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1651
1652 /* Offset into the "inum"th entry in the MSI-X table */
1653 off = (uintptr_t)msix_p->msix_tbl_addr +
1654 (inum * PCI_MSIX_VECTOR_SIZE);
1655
1656 /* Reset the "data" and "addr" bits */
1657 ddi_put32(msix_p->msix_tbl_hdl,
1658 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1659 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1660 }
1661 }
1662
1663 /*
1664 * apic_pci_msi_disable_mode:
1665 */
1666 void
1667 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1668 {
1669 ushort_t msi_ctrl;
1670 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1671 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1672
1673 ASSERT((handle != NULL) && (cap_ptr != 0));
1674
1675 if (type == DDI_INTR_TYPE_MSI) {
1676 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1677 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1678 return;
1679
1680 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */
1681 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1682
1683 } else if (type == DDI_INTR_TYPE_MSIX) {
1684 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1685 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1686 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1687 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1688 msi_ctrl);
1689 }
1690 }
1691 }
1692
1693 uint32_t
1694 apic_get_localapicid(uint32_t cpuid)
1695 {
1696 ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1697
1698 return (apic_cpus[cpuid].aci_local_id);
1699 }
1700
1701 uchar_t
1702 apic_get_ioapicid(uchar_t ioapicindex)
1703 {
1704 ASSERT(ioapicindex < MAX_IO_APIC);
1705
1706 return (apic_io_id[ioapicindex]);
1707 }