1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /*
26 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27 * Copyright (c) 2016 by Delphix. All rights reserved.
28 */
29
30 /*
31 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34 * PSMI 1.5 extensions are supported in Solaris Nevada.
35 * PSMI 1.6 extensions are supported in Solaris Nevada.
36 * PSMI 1.7 extensions are supported in Solaris Nevada.
37 */
38 #define PSMI_1_7
39
40 #include <sys/processor.h>
41 #include <sys/time.h>
42 #include <sys/psm.h>
43 #include <sys/smp_impldefs.h>
44 #include <sys/cram.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/psm_common.h>
48 #include <sys/apic.h>
49 #include <sys/pit.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/pci.h>
54 #include <sys/promif.h>
55 #include <sys/x86_archext.h>
56 #include <sys/cpc_impl.h>
57 #include <sys/uadmin.h>
58 #include <sys/panic.h>
59 #include <sys/debug.h>
60 #include <sys/archsystm.h>
61 #include <sys/trap.h>
62 #include <sys/machsystm.h>
63 #include <sys/sysmacros.h>
64 #include <sys/cpuvar.h>
65 #include <sys/rm_platter.h>
66 #include <sys/privregs.h>
67 #include <sys/note.h>
68 #include <sys/pci_intr_lib.h>
69 #include <sys/spl.h>
70 #include <sys/clock.h>
71 #include <sys/dditypes.h>
72 #include <sys/sunddi.h>
73 #include <sys/x_call.h>
74 #include <sys/reboot.h>
75 #include <sys/hpet.h>
76 #include <sys/apic_common.h>
77 #include <sys/apic_timer.h>
78
79 static void apic_record_ioapic_rdt(void *intrmap_private,
80 ioapic_rdt_t *irdt);
81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
82
83 /*
84 * Common routines between pcplusmp & apix (taken from apic.c).
85 */
86
87 int apic_clkinit(int);
88 hrtime_t apic_gethrtime(void);
89 void apic_send_ipi(int, int);
90 void apic_set_idlecpu(processorid_t);
91 void apic_unset_idlecpu(processorid_t);
92 void apic_shutdown(int, int);
93 void apic_preshutdown(int, int);
94 processorid_t apic_get_next_processorid(processorid_t);
95
96 hrtime_t apic_gettime();
97
98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
99
100 /* Now the ones for Dynamic Interrupt distribution */
101 int apic_enable_dynamic_migration = 0;
102
103 /* maximum loop count when sending Start IPIs. */
104 int apic_sipi_max_loop_count = 0x1000;
105
106 /*
107 * These variables are frequently accessed in apic_intr_enter(),
108 * apic_intr_exit and apic_setspl, so group them together
109 */
110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */
111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */
112 int apic_clkvect;
113
114 /* vector at which error interrupts come in */
115 int apic_errvect;
116 int apic_enable_error_intr = 1;
117 int apic_error_display_delay = 100;
118
119 /* vector at which performance counter overflow interrupts come in */
120 int apic_cpcovf_vect;
121 int apic_enable_cpcovf_intr = 1;
122
123 /* vector at which CMCI interrupts come in */
124 int apic_cmci_vect;
125 extern int cmi_enable_cmci;
126 extern void cmi_cmci_trap(void);
127
128 kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */
129 int cmci_cpu_setup_registered;
130
131 /* number of CPUs in power-on transition state */
132 static int apic_poweron_cnt = 0;
133 lock_t apic_mode_switch_lock;
134
135 /*
136 * Patchable global variables.
137 */
138 int apic_forceload = 0;
139
140 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */
141
142 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */
143 int apic_panic_on_nmi = 0;
144 int apic_panic_on_apic_error = 0;
145
146 int apic_verbose = 0; /* 0x1ff */
147
148 #ifdef DEBUG
149 int apic_debug = 0;
150 int apic_restrict_vector = 0;
151
152 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
153 int apic_debug_msgbufindex = 0;
154
155 #endif /* DEBUG */
156
157 uint_t apic_nticks = 0;
158 uint_t apic_skipped_redistribute = 0;
159
160 uint_t last_count_read = 0;
161 lock_t apic_gethrtime_lock;
162 volatile int apic_hrtime_stamp = 0;
163 volatile hrtime_t apic_nsec_since_boot = 0;
164
165 static hrtime_t apic_last_hrtime = 0;
166 int apic_hrtime_error = 0;
167 int apic_remote_hrterr = 0;
168 int apic_num_nmis = 0;
169 int apic_apic_error = 0;
170 int apic_num_apic_errors = 0;
171 int apic_num_cksum_errors = 0;
172
173 int apic_error = 0;
174
175 static int apic_cmos_ssb_set = 0;
176
177 /* use to make sure only one cpu handles the nmi */
178 lock_t apic_nmi_lock;
179 /* use to make sure only one cpu handles the error interrupt */
180 lock_t apic_error_lock;
181
182 static struct {
183 uchar_t cntl;
184 uchar_t data;
185 } aspen_bmc[] = {
186 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
187 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
188 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */
189 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */
190 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */
191 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */
192 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */
193 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */
194
195 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
196 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
197 };
198
199 static struct {
200 int port;
201 uchar_t data;
202 } sitka_bmc[] = {
203 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
204 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
205 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
206 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */
207 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */
208 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */
209 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */
210 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */
211 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
212 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */
213
214 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
215 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
216 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
217 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
218 };
219
220 /* Patchable global variables. */
221 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */
222 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */
223
224 /* default apic ops without interrupt remapping */
225 static apic_intrmap_ops_t apic_nointrmap_ops = {
226 (int (*)(int))return_instr,
227 (void (*)(int))return_instr,
228 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
229 (void (*)(void *, void *, uint16_t, int))return_instr,
230 (void (*)(void **))return_instr,
231 apic_record_ioapic_rdt,
232 apic_record_msi,
233 };
234
235 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
236 apic_cpus_info_t *apic_cpus = NULL;
237 cpuset_t apic_cpumask;
238 uint_t apic_picinit_called;
239
240 /* Flag to indicate that we need to shut down all processors */
241 static uint_t apic_shutdown_processors;
242
243 /*
244 * Probe the ioapic method for apix module. Called in apic_probe_common()
245 */
246 int
247 apic_ioapic_method_probe()
248 {
249 if (apix_enable == 0)
250 return (PSM_SUCCESS);
251
252 /*
253 * Set IOAPIC EOI handling method. The priority from low to high is:
254 * 1. IOxAPIC: with EOI register
255 * 2. IOMMU interrupt mapping
256 * 3. Mask-Before-EOI method for systems without boot
257 * interrupt routing, such as systems with only one IOAPIC;
258 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution
259 * which disables the boot interrupt routing already.
260 * 4. Directed EOI
261 */
262 if (apic_io_ver[0] >= 0x20)
263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
264 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
265 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
266 if (apic_directed_EOI_supported())
267 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
268
269 /* fall back to pcplusmp */
270 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
271 /* make sure apix is after pcplusmp in /etc/mach */
272 apix_enable = 0; /* go ahead with pcplusmp install next */
273 return (PSM_FAILURE);
274 }
275
276 return (PSM_SUCCESS);
277 }
278
279 /*
280 * handler for APIC Error interrupt. Just print a warning and continue
281 */
282 int
283 apic_error_intr()
284 {
285 uint_t error0, error1, error;
286 uint_t i;
287
288 /*
289 * We need to write before read as per 7.4.17 of system prog manual.
290 * We do both and or the results to be safe
291 */
292 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
293 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
294 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
295 error = error0 | error1;
296
297 /*
298 * Clear the APIC error status (do this on all cpus that enter here)
299 * (two writes are required due to the semantics of accessing the
300 * error status register.)
301 */
302 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
303 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
304
305 /*
306 * Prevent more than 1 CPU from handling error interrupt causing
307 * double printing (interleave of characters from multiple
308 * CPU's when using prom_printf)
309 */
310 if (lock_try(&apic_error_lock) == 0)
311 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
312 if (error) {
313 #if DEBUG
314 if (apic_debug)
315 debug_enter("pcplusmp: APIC Error interrupt received");
316 #endif /* DEBUG */
317 if (apic_panic_on_apic_error)
318 cmn_err(CE_PANIC,
319 "APIC Error interrupt on CPU %d. Status = %x",
320 psm_get_cpu_id(), error);
321 else {
322 if ((error & ~APIC_CS_ERRORS) == 0) {
323 /* cksum error only */
324 apic_error |= APIC_ERR_APIC_ERROR;
325 apic_apic_error |= error;
326 apic_num_apic_errors++;
327 apic_num_cksum_errors++;
328 } else {
329 /*
330 * prom_printf is the best shot we have of
331 * something which is problem free from
332 * high level/NMI type of interrupts
333 */
334 prom_printf("APIC Error interrupt on CPU %d. "
335 "Status 0 = %x, Status 1 = %x\n",
336 psm_get_cpu_id(), error0, error1);
337 apic_error |= APIC_ERR_APIC_ERROR;
338 apic_apic_error |= error;
339 apic_num_apic_errors++;
340 for (i = 0; i < apic_error_display_delay; i++) {
341 tenmicrosec();
342 }
343 /*
344 * provide more delay next time limited to
345 * roughly 1 clock tick time
346 */
347 if (apic_error_display_delay < 500)
348 apic_error_display_delay *= 2;
349 }
350 }
351 lock_clear(&apic_error_lock);
352 return (DDI_INTR_CLAIMED);
353 } else {
354 lock_clear(&apic_error_lock);
355 return (DDI_INTR_UNCLAIMED);
356 }
357 }
358
359 /*
360 * Turn off the mask bit in the performance counter Local Vector Table entry.
361 */
362 void
363 apic_cpcovf_mask_clear(void)
364 {
365 apic_reg_ops->apic_write(APIC_PCINT_VECT,
366 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
367 }
368
369 /*ARGSUSED*/
370 static int
371 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
372 {
373 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
374 return (0);
375 }
376
377 /*ARGSUSED*/
378 static int
379 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
380 {
381 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
382 return (0);
383 }
384
385 /*ARGSUSED*/
386 int
387 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
388 {
389 cpuset_t cpu_set;
390
391 CPUSET_ONLY(cpu_set, cpuid);
392
393 switch (what) {
394 case CPU_ON:
395 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
396 (xc_func_t)apic_cmci_enable);
397 break;
398
399 case CPU_OFF:
400 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
401 (xc_func_t)apic_cmci_disable);
402 break;
403
404 default:
405 break;
406 }
407
408 return (0);
409 }
410
411 static void
412 apic_disable_local_apic(void)
413 {
414 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
415 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
416
417 /* local intr reg 0 */
418 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
419
420 /* disable NMI */
421 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
422
423 /* and error interrupt */
424 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
425
426 /* and perf counter intr */
427 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
428
429 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
430 }
431
432 static void
433 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
434 {
435 int loop_count;
436 uint32_t vector;
437 uint_t apicid;
438 ulong_t iflag;
439
440 apicid = apic_cpus[cpun].aci_local_id;
441
442 /*
443 * Interrupts on current CPU will be disabled during the
444 * steps in order to avoid unwanted side effects from
445 * executing interrupt handlers on a problematic BIOS.
446 */
447 iflag = intr_clear();
448
449 if (start) {
450 outb(CMOS_ADDR, SSB);
451 outb(CMOS_DATA, BIOS_SHUTDOWN);
452 }
453
454 /*
455 * According to X2APIC specification in section '2.3.5.1' of
456 * Interrupt Command Register Semantics, the semantics of
457 * programming the Interrupt Command Register to dispatch an interrupt
458 * is simplified. A single MSR write to the 64-bit ICR is required
459 * for dispatching an interrupt. Specifically, with the 64-bit MSR
460 * interface to ICR, system software is not required to check the
461 * status of the delivery status bit prior to writing to the ICR
462 * to send an IPI. With the removal of the Delivery Status bit,
463 * system software no longer has a reason to read the ICR. It remains
464 * readable only to aid in debugging.
465 */
466 #ifdef DEBUG
467 APIC_AV_PENDING_SET();
468 #else
469 if (apic_mode == LOCAL_APIC) {
470 APIC_AV_PENDING_SET();
471 }
472 #endif /* DEBUG */
473
474 /* for integrated - make sure there is one INIT IPI in buffer */
475 /* for external - it will wake up the cpu */
476 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
477
478 /* If only 1 CPU is installed, PENDING bit will not go low */
479 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
480 if (apic_mode == LOCAL_APIC &&
481 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
482 apic_ret();
483 else
484 break;
485 }
486
487 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
488 drv_usecwait(20000); /* 20 milli sec */
489
490 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
491 /* integrated apic */
492
493 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
494 (APIC_VECTOR_MASK | APIC_IPL_MASK);
495
496 /* to offset the INIT IPI queue up in the buffer */
497 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
498 drv_usecwait(200); /* 20 micro sec */
499
500 /*
501 * send the second SIPI (Startup IPI) as recommended by Intel
502 * software development manual.
503 */
504 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
505 drv_usecwait(200); /* 20 micro sec */
506 }
507
508 intr_restore(iflag);
509 }
510
511 /*ARGSUSED1*/
512 int
513 apic_cpu_start(processorid_t cpun, caddr_t arg)
514 {
515 ASSERT(MUTEX_HELD(&cpu_lock));
516
517 if (!apic_cpu_in_range(cpun)) {
518 return (EINVAL);
519 }
520
521 /*
522 * Switch to apic_common_send_ipi for safety during starting other CPUs.
523 */
524 if (apic_mode == LOCAL_X2APIC) {
525 apic_switch_ipi_callback(B_TRUE);
526 }
527
528 apic_cmos_ssb_set = 1;
529 apic_cpu_send_SIPI(cpun, B_TRUE);
530
531 return (0);
532 }
533
534 /*
535 * Put CPU into halted state with interrupts disabled.
536 */
537 /*ARGSUSED1*/
538 int
539 apic_cpu_stop(processorid_t cpun, caddr_t arg)
540 {
541 int rc;
542 cpu_t *cp;
543 extern cpuset_t cpu_ready_set;
544 extern void cpu_idle_intercept_cpu(cpu_t *cp);
545
546 ASSERT(MUTEX_HELD(&cpu_lock));
547
548 if (!apic_cpu_in_range(cpun)) {
549 return (EINVAL);
550 }
551 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
552 return (ENOTSUP);
553 }
554
555 cp = cpu_get(cpun);
556 ASSERT(cp != NULL);
557 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
558 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
559 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
560
561 /* Clear CPU_READY flag to disable cross calls. */
562 cp->cpu_flags &= ~CPU_READY;
563 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
564 rc = xc_flush_cpu(cp);
565 if (rc != 0) {
566 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
567 cp->cpu_flags |= CPU_READY;
568 return (rc);
569 }
570
571 /* Intercept target CPU at a safe point before powering it off. */
572 cpu_idle_intercept_cpu(cp);
573
574 apic_cpu_send_SIPI(cpun, B_FALSE);
575 cp->cpu_flags &= ~CPU_RUNNING;
576
577 return (0);
578 }
579
580 int
581 apic_cpu_ops(psm_cpu_request_t *reqp)
582 {
583 if (reqp == NULL) {
584 return (EINVAL);
585 }
586
587 switch (reqp->pcr_cmd) {
588 case PSM_CPU_ADD:
589 return (apic_cpu_add(reqp));
590
591 case PSM_CPU_REMOVE:
592 return (apic_cpu_remove(reqp));
593
594 case PSM_CPU_STOP:
595 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
596 reqp->req.cpu_stop.ctx));
597
598 default:
599 return (ENOTSUP);
600 }
601 }
602
603 #ifdef DEBUG
604 int apic_break_on_cpu = 9;
605 int apic_stretch_interrupts = 0;
606 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */
607 #endif /* DEBUG */
608
609 /*
610 * generates an interprocessor interrupt to another CPU. Any changes made to
611 * this routine must be accompanied by similar changes to
612 * apic_common_send_ipi().
613 */
614 void
615 apic_send_ipi(int cpun, int ipl)
616 {
617 int vector;
618 ulong_t flag;
619
620 vector = apic_resv_vector[ipl];
621
622 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
623
624 flag = intr_clear();
625
626 APIC_AV_PENDING_SET();
627
628 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
629 vector);
630
631 intr_restore(flag);
632 }
633
634
635 /*ARGSUSED*/
636 void
637 apic_set_idlecpu(processorid_t cpun)
638 {
639 }
640
641 /*ARGSUSED*/
642 void
643 apic_unset_idlecpu(processorid_t cpun)
644 {
645 }
646
647
648 void
649 apic_ret()
650 {
651 }
652
653 /*
654 * If apic_coarse_time == 1, then apic_gettime() is used instead of
655 * apic_gethrtime(). This is used for performance instead of accuracy.
656 */
657
658 hrtime_t
659 apic_gettime()
660 {
661 int old_hrtime_stamp;
662 hrtime_t temp;
663
664 /*
665 * In one-shot mode, we do not keep time, so if anyone
666 * calls psm_gettime() directly, we vector over to
667 * gethrtime().
668 * one-shot mode MUST NOT be enabled if this psm is the source of
669 * hrtime.
670 */
671
672 if (apic_oneshot)
673 return (gethrtime());
674
675
676 gettime_again:
677 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
678 apic_ret();
679
680 temp = apic_nsec_since_boot;
681
682 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
683 goto gettime_again;
684 }
685 return (temp);
686 }
687
688 /*
689 * Here we return the number of nanoseconds since booting. Note every
690 * clock interrupt increments apic_nsec_since_boot by the appropriate
691 * amount.
692 */
693 hrtime_t
694 apic_gethrtime(void)
695 {
696 int curr_timeval, countval, elapsed_ticks;
697 int old_hrtime_stamp, status;
698 hrtime_t temp;
699 uint32_t cpun;
700 ulong_t oflags;
701
702 /*
703 * In one-shot mode, we do not keep time, so if anyone
704 * calls psm_gethrtime() directly, we vector over to
705 * gethrtime().
706 * one-shot mode MUST NOT be enabled if this psm is the source of
707 * hrtime.
708 */
709
710 if (apic_oneshot)
711 return (gethrtime());
712
713 oflags = intr_clear(); /* prevent migration */
714
715 cpun = apic_reg_ops->apic_read(APIC_LID_REG);
716 if (apic_mode == LOCAL_APIC)
717 cpun >>= APIC_ID_BIT_OFFSET;
718
719 lock_set(&apic_gethrtime_lock);
720
721 gethrtime_again:
722 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
723 apic_ret();
724
725 /*
726 * Check to see which CPU we are on. Note the time is kept on
727 * the local APIC of CPU 0. If on CPU 0, simply read the current
728 * counter. If on another CPU, issue a remote read command to CPU 0.
729 */
730 if (cpun == apic_cpus[0].aci_local_id) {
731 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
732 } else {
733 #ifdef DEBUG
734 APIC_AV_PENDING_SET();
735 #else
736 if (apic_mode == LOCAL_APIC)
737 APIC_AV_PENDING_SET();
738 #endif /* DEBUG */
739
740 apic_reg_ops->apic_write_int_cmd(
741 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
742
743 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
744 & AV_READ_PENDING) {
745 apic_ret();
746 }
747
748 if (status & AV_REMOTE_STATUS) /* 1 = valid */
749 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
750 else { /* 0 = invalid */
751 apic_remote_hrterr++;
752 /*
753 * return last hrtime right now, will need more
754 * testing if change to retry
755 */
756 temp = apic_last_hrtime;
757
758 lock_clear(&apic_gethrtime_lock);
759
760 intr_restore(oflags);
761
762 return (temp);
763 }
764 }
765 if (countval > last_count_read)
766 countval = 0;
767 else
768 last_count_read = countval;
769
770 elapsed_ticks = apic_hertz_count - countval;
771
772 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
773 temp = apic_nsec_since_boot + curr_timeval;
774
775 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
776 /* we might have clobbered last_count_read. Restore it */
777 last_count_read = apic_hertz_count;
778 goto gethrtime_again;
779 }
780
781 if (temp < apic_last_hrtime) {
782 /* return last hrtime if error occurs */
783 apic_hrtime_error++;
784 temp = apic_last_hrtime;
785 }
786 else
787 apic_last_hrtime = temp;
788
789 lock_clear(&apic_gethrtime_lock);
790 intr_restore(oflags);
791
792 return (temp);
793 }
794
795 /* apic NMI handler */
796 /*ARGSUSED*/
797 void
798 apic_nmi_intr(caddr_t arg, struct regs *rp)
799 {
800 if (apic_shutdown_processors) {
801 apic_disable_local_apic();
802 return;
803 }
804
805 apic_error |= APIC_ERR_NMI;
806
807 if (!lock_try(&apic_nmi_lock))
808 return;
809 apic_num_nmis++;
810
811 if (apic_kmdb_on_nmi && psm_debugger()) {
812 debug_enter("NMI received: entering kmdb\n");
813 } else if (apic_panic_on_nmi) {
814 /* Keep panic from entering kmdb. */
815 nopanicdebug = 1;
816 panic("NMI received\n");
817 } else {
818 /*
819 * prom_printf is the best shot we have of something which is
820 * problem free from high level/NMI type of interrupts
821 */
822 prom_printf("NMI received\n");
823 }
824
825 lock_clear(&apic_nmi_lock);
826 }
827
828 processorid_t
829 apic_get_next_processorid(processorid_t cpu_id)
830 {
831
832 int i;
833
834 if (cpu_id == -1)
835 return ((processorid_t)0);
836
837 for (i = cpu_id + 1; i < NCPU; i++) {
838 if (apic_cpu_in_range(i))
839 return (i);
840 }
841
842 return ((processorid_t)-1);
843 }
844
845 int
846 apic_cpu_add(psm_cpu_request_t *reqp)
847 {
848 int i, rv = 0;
849 ulong_t iflag;
850 boolean_t first = B_TRUE;
851 uchar_t localver;
852 uint32_t localid, procid;
853 processorid_t cpuid = (processorid_t)-1;
854 mach_cpu_add_arg_t *ap;
855
856 ASSERT(reqp != NULL);
857 reqp->req.cpu_add.cpuid = (processorid_t)-1;
858
859 /* Check whether CPU hotplug is supported. */
860 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
861 return (ENOTSUP);
862 }
863
864 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
865 switch (ap->type) {
866 case MACH_CPU_ARG_LOCAL_APIC:
867 localid = ap->arg.apic.apic_id;
868 procid = ap->arg.apic.proc_id;
869 if (localid >= 255 || procid > 255) {
870 cmn_err(CE_WARN,
871 "!apic: apicid(%u) or procid(%u) is invalid.",
872 localid, procid);
873 return (EINVAL);
874 }
875 break;
876
877 case MACH_CPU_ARG_LOCAL_X2APIC:
878 localid = ap->arg.apic.apic_id;
879 procid = ap->arg.apic.proc_id;
880 if (localid >= UINT32_MAX) {
881 cmn_err(CE_WARN,
882 "!apic: x2apicid(%u) is invalid.", localid);
883 return (EINVAL);
884 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
885 cmn_err(CE_WARN, "!apic: system is in APIC mode, "
886 "can't support x2APIC processor.");
887 return (ENOTSUP);
888 }
889 break;
890
891 default:
892 cmn_err(CE_WARN,
893 "!apic: unknown argument type %d to apic_cpu_add().",
894 ap->type);
895 return (EINVAL);
896 }
897
898 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
899 iflag = intr_clear();
900 lock_set(&apic_ioapic_lock);
901
902 /* Check whether local APIC id already exists. */
903 for (i = 0; i < apic_nproc; i++) {
904 if (!CPU_IN_SET(apic_cpumask, i))
905 continue;
906 if (apic_cpus[i].aci_local_id == localid) {
907 lock_clear(&apic_ioapic_lock);
908 intr_restore(iflag);
909 cmn_err(CE_WARN,
910 "!apic: local apic id %u already exists.",
911 localid);
912 return (EEXIST);
913 } else if (apic_cpus[i].aci_processor_id == procid) {
914 lock_clear(&apic_ioapic_lock);
915 intr_restore(iflag);
916 cmn_err(CE_WARN,
917 "!apic: processor id %u already exists.",
918 (int)procid);
919 return (EEXIST);
920 }
921
922 /*
923 * There's no local APIC version number available in MADT table,
924 * so assume that all CPUs are homogeneous and use local APIC
925 * version number of the first existing CPU.
926 */
927 if (first) {
928 first = B_FALSE;
929 localver = apic_cpus[i].aci_local_ver;
930 }
931 }
932 ASSERT(first == B_FALSE);
933
934 /*
935 * Try to assign the same cpuid if APIC id exists in the dirty cache.
936 */
937 for (i = 0; i < apic_max_nproc; i++) {
938 if (CPU_IN_SET(apic_cpumask, i)) {
939 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
940 continue;
941 }
942 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
943 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
944 apic_cpus[i].aci_local_id == localid &&
945 apic_cpus[i].aci_processor_id == procid) {
946 cpuid = i;
947 break;
948 }
949 }
950
951 /* Avoid the dirty cache and allocate fresh slot if possible. */
952 if (cpuid == (processorid_t)-1) {
953 for (i = 0; i < apic_max_nproc; i++) {
954 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
955 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
956 cpuid = i;
957 break;
958 }
959 }
960 }
961
962 /* Try to find any free slot as last resort. */
963 if (cpuid == (processorid_t)-1) {
964 for (i = 0; i < apic_max_nproc; i++) {
965 if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
966 cpuid = i;
967 break;
968 }
969 }
970 }
971
972 if (cpuid == (processorid_t)-1) {
973 lock_clear(&apic_ioapic_lock);
974 intr_restore(iflag);
975 cmn_err(CE_NOTE,
976 "!apic: failed to allocate cpu id for processor %u.",
977 procid);
978 rv = EAGAIN;
979 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
980 lock_clear(&apic_ioapic_lock);
981 intr_restore(iflag);
982 cmn_err(CE_NOTE,
983 "!apic: failed to build mapping for processor %u.",
984 procid);
985 rv = EBUSY;
986 } else {
987 ASSERT(cpuid >= 0 && cpuid < NCPU);
988 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
989 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
990 apic_cpus[cpuid].aci_processor_id = procid;
991 apic_cpus[cpuid].aci_local_id = localid;
992 apic_cpus[cpuid].aci_local_ver = localver;
993 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
994 if (cpuid >= apic_nproc) {
995 apic_nproc = cpuid + 1;
996 }
997 lock_clear(&apic_ioapic_lock);
998 intr_restore(iflag);
999 reqp->req.cpu_add.cpuid = cpuid;
1000 }
1001
1002 return (rv);
1003 }
1004
1005 int
1006 apic_cpu_remove(psm_cpu_request_t *reqp)
1007 {
1008 int i;
1009 ulong_t iflag;
1010 processorid_t cpuid;
1011
1012 /* Check whether CPU hotplug is supported. */
1013 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1014 return (ENOTSUP);
1015 }
1016
1017 cpuid = reqp->req.cpu_remove.cpuid;
1018
1019 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1020 iflag = intr_clear();
1021 lock_set(&apic_ioapic_lock);
1022
1023 if (!apic_cpu_in_range(cpuid)) {
1024 lock_clear(&apic_ioapic_lock);
1025 intr_restore(iflag);
1026 cmn_err(CE_WARN,
1027 "!apic: cpuid %d doesn't exist in apic_cpus array.",
1028 cpuid);
1029 return (ENODEV);
1030 }
1031 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1032
1033 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1034 lock_clear(&apic_ioapic_lock);
1035 intr_restore(iflag);
1036 return (ENOENT);
1037 }
1038
1039 if (cpuid == apic_nproc - 1) {
1040 /*
1041 * We are removing the highest numbered cpuid so we need to
1042 * find the next highest cpuid as the new value for apic_nproc.
1043 */
1044 for (i = apic_nproc; i > 0; i--) {
1045 if (CPU_IN_SET(apic_cpumask, i - 1)) {
1046 apic_nproc = i;
1047 break;
1048 }
1049 }
1050 /* at least one CPU left */
1051 ASSERT(i > 0);
1052 }
1053 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1054 /* mark slot as free and keep it in the dirty cache */
1055 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1056
1057 lock_clear(&apic_ioapic_lock);
1058 intr_restore(iflag);
1059
1060 return (0);
1061 }
1062
1063 /*
1064 * Return the number of APIC clock ticks elapsed for 8245 to decrement
1065 * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1066 */
1067 uint_t
1068 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1069 {
1070 uint8_t pit_tick_lo;
1071 uint16_t pit_tick, target_pit_tick;
1072 uint32_t start_apic_tick, end_apic_tick;
1073 ulong_t iflag;
1074 uint32_t reg;
1075
1076 reg = addr + APIC_CURR_COUNT - apicadr;
1077
1078 iflag = intr_clear();
1079
1080 do {
1081 pit_tick_lo = inb(PITCTR0_PORT);
1082 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1083 } while (pit_tick < APIC_TIME_MIN ||
1084 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1085
1086 /*
1087 * Wait for the 8254 to decrement by 5 ticks to ensure
1088 * we didn't start in the middle of a tick.
1089 * Compare with 0x10 for the wrap around case.
1090 */
1091 target_pit_tick = pit_tick - 5;
1092 do {
1093 pit_tick_lo = inb(PITCTR0_PORT);
1094 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1095 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1096
1097 start_apic_tick = apic_reg_ops->apic_read(reg);
1098
1099 /*
1100 * Wait for the 8254 to decrement by
1101 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1102 */
1103 target_pit_tick = pit_tick - APIC_TIME_COUNT;
1104 do {
1105 pit_tick_lo = inb(PITCTR0_PORT);
1106 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1107 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1108
1109 end_apic_tick = apic_reg_ops->apic_read(reg);
1110
1111 *pit_ticks_adj = target_pit_tick - pit_tick;
1112
1113 intr_restore(iflag);
1114
1115 return (start_apic_tick - end_apic_tick);
1116 }
1117
1118 /*
1119 * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1120 * frequency. Note at this stage in the boot sequence, the boot processor
1121 * is the only active processor.
1122 * hertz value of 0 indicates a one-shot mode request. In this case
1123 * the function returns the resolution (in nanoseconds) for the hardware
1124 * timer interrupt. If one-shot mode capability is not available,
1125 * the return value will be 0. apic_enable_oneshot is a global switch
1126 * for disabling the functionality.
1127 * A non-zero positive value for hertz indicates a periodic mode request.
1128 * In this case the hardware will be programmed to generate clock interrupts
1129 * at hertz frequency and returns the resolution of interrupts in
1130 * nanosecond.
1131 */
1132
1133 int
1134 apic_clkinit(int hertz)
1135 {
1136 int ret;
1137
1138 apic_int_busy_mark = (apic_int_busy_mark *
1139 apic_sample_factor_redistribution) / 100;
1140 apic_int_free_mark = (apic_int_free_mark *
1141 apic_sample_factor_redistribution) / 100;
1142 apic_diff_for_redistribution = (apic_diff_for_redistribution *
1143 apic_sample_factor_redistribution) / 100;
1144
1145 ret = apic_timer_init(hertz);
1146 return (ret);
1147
1148 }
1149
1150 /*
1151 * apic_preshutdown:
1152 * Called early in shutdown whilst we can still access filesystems to do
1153 * things like loading modules which will be required to complete shutdown
1154 * after filesystems are all unmounted.
1155 */
1156 void
1157 apic_preshutdown(int cmd, int fcn)
1158 {
1159 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1160 cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1161 }
1162
1163 void
1164 apic_shutdown(int cmd, int fcn)
1165 {
1166 int restarts, attempts;
1167 int i;
1168 uchar_t byte;
1169 ulong_t iflag;
1170
1171 hpet_acpi_fini();
1172
1173 /* Send NMI to all CPUs except self to do per processor shutdown */
1174 iflag = intr_clear();
1175 #ifdef DEBUG
1176 APIC_AV_PENDING_SET();
1177 #else
1178 if (apic_mode == LOCAL_APIC)
1179 APIC_AV_PENDING_SET();
1180 #endif /* DEBUG */
1181 apic_shutdown_processors = 1;
1182 apic_reg_ops->apic_write(APIC_INT_CMD1,
1183 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1184
1185 /* restore cmos shutdown byte before reboot */
1186 if (apic_cmos_ssb_set) {
1187 outb(CMOS_ADDR, SSB);
1188 outb(CMOS_DATA, 0);
1189 }
1190
1191 ioapic_disable_redirection();
1192
1193 /* disable apic mode if imcr present */
1194 if (apic_imcrp) {
1195 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1196 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1197 }
1198
1199 apic_disable_local_apic();
1200
1201 intr_restore(iflag);
1202
1203 /* remainder of function is for shutdown cases only */
1204 if (cmd != A_SHUTDOWN)
1205 return;
1206
1207 /*
1208 * Switch system back into Legacy-Mode if using ACPI and
1209 * not powering-off. Some BIOSes need to remain in ACPI-mode
1210 * for power-off to succeed (Dell Dimension 4600)
1211 * Do not disable ACPI while doing fastreboot
1212 */
1213 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1214 (void) AcpiDisable();
1215
1216 if (fcn == AD_FASTREBOOT) {
1217 apic_reg_ops->apic_write(APIC_INT_CMD1,
1218 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1219 }
1220
1221 /* remainder of function is for shutdown+poweroff case only */
1222 if (fcn != AD_POWEROFF)
1223 return;
1224
1225 switch (apic_poweroff_method) {
1226 case APIC_POWEROFF_VIA_RTC:
1227
1228 /* select the extended NVRAM bank in the RTC */
1229 outb(CMOS_ADDR, RTC_REGA);
1230 byte = inb(CMOS_DATA);
1231 outb(CMOS_DATA, (byte | EXT_BANK));
1232
1233 outb(CMOS_ADDR, PFR_REG);
1234
1235 /* for Predator must toggle the PAB bit */
1236 byte = inb(CMOS_DATA);
1237
1238 /*
1239 * clear power active bar, wakeup alarm and
1240 * kickstart
1241 */
1242 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1243 outb(CMOS_DATA, byte);
1244
1245 /* delay before next write */
1246 drv_usecwait(1000);
1247
1248 /* for S40 the following would suffice */
1249 byte = inb(CMOS_DATA);
1250
1251 /* power active bar control bit */
1252 byte |= PAB_CBIT;
1253 outb(CMOS_DATA, byte);
1254
1255 break;
1256
1257 case APIC_POWEROFF_VIA_ASPEN_BMC:
1258 restarts = 0;
1259 restart_aspen_bmc:
1260 if (++restarts == 3)
1261 break;
1262 attempts = 0;
1263 do {
1264 byte = inb(MISMIC_FLAG_REGISTER);
1265 byte &= MISMIC_BUSY_MASK;
1266 if (byte != 0) {
1267 drv_usecwait(1000);
1268 if (attempts >= 3)
1269 goto restart_aspen_bmc;
1270 ++attempts;
1271 }
1272 } while (byte != 0);
1273 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1274 byte = inb(MISMIC_FLAG_REGISTER);
1275 byte |= 0x1;
1276 outb(MISMIC_FLAG_REGISTER, byte);
1277 i = 0;
1278 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1279 i++) {
1280 attempts = 0;
1281 do {
1282 byte = inb(MISMIC_FLAG_REGISTER);
1283 byte &= MISMIC_BUSY_MASK;
1284 if (byte != 0) {
1285 drv_usecwait(1000);
1286 if (attempts >= 3)
1287 goto restart_aspen_bmc;
1288 ++attempts;
1289 }
1290 } while (byte != 0);
1291 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1292 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1293 byte = inb(MISMIC_FLAG_REGISTER);
1294 byte |= 0x1;
1295 outb(MISMIC_FLAG_REGISTER, byte);
1296 }
1297 break;
1298
1299 case APIC_POWEROFF_VIA_SITKA_BMC:
1300 restarts = 0;
1301 restart_sitka_bmc:
1302 if (++restarts == 3)
1303 break;
1304 attempts = 0;
1305 do {
1306 byte = inb(SMS_STATUS_REGISTER);
1307 byte &= SMS_STATE_MASK;
1308 if ((byte == SMS_READ_STATE) ||
1309 (byte == SMS_WRITE_STATE)) {
1310 drv_usecwait(1000);
1311 if (attempts >= 3)
1312 goto restart_sitka_bmc;
1313 ++attempts;
1314 }
1315 } while ((byte == SMS_READ_STATE) ||
1316 (byte == SMS_WRITE_STATE));
1317 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1318 i = 0;
1319 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1320 i++) {
1321 attempts = 0;
1322 do {
1323 byte = inb(SMS_STATUS_REGISTER);
1324 byte &= SMS_IBF_MASK;
1325 if (byte != 0) {
1326 drv_usecwait(1000);
1327 if (attempts >= 3)
1328 goto restart_sitka_bmc;
1329 ++attempts;
1330 }
1331 } while (byte != 0);
1332 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1333 }
1334 break;
1335
1336 case APIC_POWEROFF_NONE:
1337
1338 /* If no APIC direct method, we will try using ACPI */
1339 if (apic_enable_acpi) {
1340 if (acpi_poweroff() == 1)
1341 return;
1342 } else
1343 return;
1344
1345 break;
1346 }
1347 /*
1348 * Wait a limited time here for power to go off.
1349 * If the power does not go off, then there was a
1350 * problem and we should continue to the halt which
1351 * prints a message for the user to press a key to
1352 * reboot.
1353 */
1354 drv_usecwait(7000000); /* wait seven seconds */
1355
1356 }
1357
1358 cyclic_id_t apic_cyclic_id;
1359
1360 /*
1361 * The following functions are in the platform specific file so that they
1362 * can be different functions depending on whether we are running on
1363 * bare metal or a hypervisor.
1364 */
1365
1366 /*
1367 * map an apic for memory-mapped access
1368 */
1369 uint32_t *
1370 mapin_apic(uint32_t addr, size_t len, int flags)
1371 {
1372 return ((void *)psm_map_phys(addr, len, flags));
1373 }
1374
1375 uint32_t *
1376 mapin_ioapic(uint32_t addr, size_t len, int flags)
1377 {
1378 return (mapin_apic(addr, len, flags));
1379 }
1380
1381 /*
1382 * unmap an apic
1383 */
1384 void
1385 mapout_apic(caddr_t addr, size_t len)
1386 {
1387 psm_unmap_phys(addr, len);
1388 }
1389
1390 void
1391 mapout_ioapic(caddr_t addr, size_t len)
1392 {
1393 mapout_apic(addr, len);
1394 }
1395
1396 uint32_t
1397 ioapic_read(int ioapic_ix, uint32_t reg)
1398 {
1399 volatile uint32_t *ioapic;
1400
1401 ioapic = apicioadr[ioapic_ix];
1402 ioapic[APIC_IO_REG] = reg;
1403 return (ioapic[APIC_IO_DATA]);
1404 }
1405
1406 void
1407 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1408 {
1409 volatile uint32_t *ioapic;
1410
1411 ioapic = apicioadr[ioapic_ix];
1412 ioapic[APIC_IO_REG] = reg;
1413 ioapic[APIC_IO_DATA] = value;
1414 }
1415
1416 void
1417 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1418 {
1419 volatile uint32_t *ioapic;
1420
1421 ioapic = apicioadr[ioapic_ix];
1422 ioapic[APIC_IO_EOI] = value;
1423 }
1424
1425 /*
1426 * Round-robin algorithm to find the next CPU with interrupts enabled.
1427 * It can't share the same static variable apic_next_bind_cpu with
1428 * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1429 * bound to CPU1 at boot time. During boot, only CPU0 is online with
1430 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1431 * are called. However, the pcplusmp driver assumes that there will be
1432 * boot_ncpus CPUs configured eventually so it tries to distribute all
1433 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all
1434 * interrupts being targetted at CPU1, we need to use a dedicated static
1435 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1436 */
1437
1438 processorid_t
1439 apic_find_cpu(int flag)
1440 {
1441 int i;
1442 static processorid_t acid = 0;
1443
1444 /* Find the first CPU with the passed-in flag set */
1445 for (i = 0; i < apic_nproc; i++) {
1446 if (++acid >= apic_nproc) {
1447 acid = 0;
1448 }
1449 if (apic_cpu_in_range(acid) &&
1450 (apic_cpus[acid].aci_status & flag)) {
1451 break;
1452 }
1453 }
1454
1455 ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1456 return (acid);
1457 }
1458
1459 /*
1460 * Switch between safe and x2APIC IPI sending method.
1461 * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1462 * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1463 * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1464 * apic_common_send_ipi, which detects current local APIC mode and use right
1465 * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1466 * won't return to zero, so apic_common_send_ipi will always be used.
1467 * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1468 * failed to start up because those failed CPUs may recover itself later at
1469 * unpredictable time.
1470 */
1471 void
1472 apic_switch_ipi_callback(boolean_t enter)
1473 {
1474 ulong_t iflag;
1475 struct psm_ops *pops = psmops;
1476
1477 iflag = intr_clear();
1478 lock_set(&apic_mode_switch_lock);
1479 if (enter) {
1480 ASSERT(apic_poweron_cnt >= 0);
1481 if (apic_poweron_cnt == 0) {
1482 pops->psm_send_ipi = apic_common_send_ipi;
1483 send_dirintf = pops->psm_send_ipi;
1484 }
1485 apic_poweron_cnt++;
1486 } else {
1487 ASSERT(apic_poweron_cnt > 0);
1488 apic_poweron_cnt--;
1489 if (apic_poweron_cnt == 0) {
1490 pops->psm_send_ipi = x2apic_send_ipi;
1491 send_dirintf = pops->psm_send_ipi;
1492 }
1493 }
1494 lock_clear(&apic_mode_switch_lock);
1495 intr_restore(iflag);
1496 }
1497
1498 void
1499 apic_intrmap_init(int apic_mode)
1500 {
1501 int suppress_brdcst_eoi = 0;
1502
1503 /*
1504 * Intel Software Developer's Manual 3A, 10.12.7:
1505 *
1506 * Routing of device interrupts to local APIC units operating in
1507 * x2APIC mode requires use of the interrupt-remapping architecture
1508 * specified in the Intel Virtualization Technology for Directed
1509 * I/O, Revision 1.3. Because of this, BIOS must enumerate support
1510 * for and software must enable this interrupt remapping with
1511 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1512 * the local APIC units.
1513 *
1514 *
1515 * In other words, to use the APIC in x2APIC mode, we need interrupt
1516 * remapping. Since we don't start up the IOMMU by default, we
1517 * won't be able to do any interrupt remapping and therefore have to
1518 * use the APIC in traditional 'local APIC' mode with memory mapped
1519 * I/O.
1520 */
1521
1522 if (psm_vt_ops != NULL) {
1523 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1524 apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1525
1526 apic_vt_ops = psm_vt_ops;
1527
1528 /*
1529 * We leverage the interrupt remapping engine to
1530 * suppress broadcast EOI; thus we must send the
1531 * directed EOI with the directed-EOI handler.
1532 */
1533 if (apic_directed_EOI_supported() == 0) {
1534 suppress_brdcst_eoi = 1;
1535 }
1536
1537 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1538
1539 if (apic_detect_x2apic()) {
1540 apic_enable_x2apic();
1541 }
1542
1543 if (apic_directed_EOI_supported() == 0) {
1544 apic_set_directed_EOI_handler();
1545 }
1546 }
1547 }
1548 }
1549
1550 /*ARGSUSED*/
1551 static void
1552 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1553 {
1554 irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1555 }
1556
1557 /*ARGSUSED*/
1558 static void
1559 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1560 {
1561 mregs->mr_addr = MSI_ADDR_HDR |
1562 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1563 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1564 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1565 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1566 mregs->mr_data;
1567 }
1568
1569 /*
1570 * Functions from apic_introp.c
1571 *
1572 * Those functions are used by apic_intr_ops().
1573 */
1574
1575 /*
1576 * MSI support flag:
1577 * reflects whether MSI is supported at APIC level
1578 * it can also be patched through /etc/system
1579 *
1580 * 0 = default value - don't know and need to call apic_check_msi_support()
1581 * to find out then set it accordingly
1582 * 1 = supported
1583 * -1 = not supported
1584 */
1585 int apic_support_msi = 0;
1586
1587 /* Multiple vector support for MSI-X */
1588 int apic_msix_enable = 1;
1589
1590 /* Multiple vector support for MSI */
1591 int apic_multi_msi_enable = 1;
1592
1593 /*
1594 * Check whether the system supports MSI.
1595 *
1596 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1597 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1598 * return PSM_SUCCESS to indicate this system supports MSI.
1599 *
1600 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1601 * by detecting if we are running inside the KVM hypervisor, which guarantees
1602 * this version number.)
1603 */
1604 int
1605 apic_check_msi_support()
1606 {
1607 dev_info_t *cdip;
1608 char dev_type[16];
1609 int dev_len;
1610
1611 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1612
1613 /*
1614 * check whether the first level children of root_node have
1615 * PCI-E or PCI capability.
1616 */
1617 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1618 cdip = ddi_get_next_sibling(cdip)) {
1619
1620 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1621 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1622 ddi_driver_name(cdip), ddi_binding_name(cdip),
1623 ddi_node_name(cdip)));
1624 dev_len = sizeof (dev_type);
1625 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1626 "device_type", (caddr_t)dev_type, &dev_len)
1627 != DDI_PROP_SUCCESS)
1628 continue;
1629 if (strcmp(dev_type, "pciex") == 0)
1630 return (PSM_SUCCESS);
1631 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
1632 return (PSM_SUCCESS);
1633 }
1634
1635 /* MSI is not supported on this system */
1636 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1637 "device_type found\n"));
1638 return (PSM_FAILURE);
1639 }
1640
1641 /*
1642 * apic_pci_msi_unconfigure:
1643 *
1644 * This and next two interfaces are copied from pci_intr_lib.c
1645 * Do ensure that these two files stay in sync.
1646 * These needed to be copied over here to avoid a deadlock situation on
1647 * certain mp systems that use MSI interrupts.
1648 *
1649 * IMPORTANT regards next three interfaces:
1650 * i) are called only for MSI/X interrupts.
1651 * ii) called with interrupts disabled, and must not block
1652 */
1653 void
1654 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1655 {
1656 ushort_t msi_ctrl;
1657 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1658 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1659
1660 ASSERT((handle != NULL) && (cap_ptr != 0));
1661
1662 if (type == DDI_INTR_TYPE_MSI) {
1663 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1664 msi_ctrl &= (~PCI_MSI_MME_MASK);
1665 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1666 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1667
1668 if (msi_ctrl & PCI_MSI_64BIT_MASK) {
1669 pci_config_put16(handle,
1670 cap_ptr + PCI_MSI_64BIT_DATA, 0);
1671 pci_config_put32(handle,
1672 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1673 } else {
1674 pci_config_put16(handle,
1675 cap_ptr + PCI_MSI_32BIT_DATA, 0);
1676 }
1677
1678 } else if (type == DDI_INTR_TYPE_MSIX) {
1679 uintptr_t off;
1680 uint32_t mask;
1681 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1682
1683 ASSERT(msix_p != NULL);
1684
1685 /* Offset into "inum"th entry in the MSI-X table & mask it */
1686 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1687 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1688
1689 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1690
1691 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1692
1693 /* Offset into the "inum"th entry in the MSI-X table */
1694 off = (uintptr_t)msix_p->msix_tbl_addr +
1695 (inum * PCI_MSIX_VECTOR_SIZE);
1696
1697 /* Reset the "data" and "addr" bits */
1698 ddi_put32(msix_p->msix_tbl_hdl,
1699 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1700 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1701 }
1702 }
1703
1704 /*
1705 * apic_pci_msi_disable_mode:
1706 */
1707 void
1708 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1709 {
1710 ushort_t msi_ctrl;
1711 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1712 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1713
1714 ASSERT((handle != NULL) && (cap_ptr != 0));
1715
1716 if (type == DDI_INTR_TYPE_MSI) {
1717 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1718 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1719 return;
1720
1721 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */
1722 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1723
1724 } else if (type == DDI_INTR_TYPE_MSIX) {
1725 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1726 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1727 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1728 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1729 msi_ctrl);
1730 }
1731 }
1732 }
1733
1734 uint32_t
1735 apic_get_localapicid(uint32_t cpuid)
1736 {
1737 ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1738
1739 return (apic_cpus[cpuid].aci_local_id);
1740 }
1741
1742 uchar_t
1743 apic_get_ioapicid(uchar_t ioapicindex)
1744 {
1745 ASSERT(ioapicindex < MAX_IO_APIC);
1746
1747 return (apic_io_id[ioapicindex]);
1748 }