1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  27  * Copyright (c) 2016 by Delphix. All rights reserved.
  28  */
  29 
  30 /*
  31  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
  32  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
  33  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  34  * PSMI 1.5 extensions are supported in Solaris Nevada.
  35  * PSMI 1.6 extensions are supported in Solaris Nevada.
  36  * PSMI 1.7 extensions are supported in Solaris Nevada.
  37  */
  38 #define PSMI_1_7
  39 
  40 #include <sys/processor.h>
  41 #include <sys/time.h>
  42 #include <sys/psm.h>
  43 #include <sys/smp_impldefs.h>
  44 #include <sys/cram.h>
  45 #include <sys/acpi/acpi.h>
  46 #include <sys/acpica.h>
  47 #include <sys/psm_common.h>
  48 #include <sys/apic.h>
  49 #include <sys/pit.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/ddi_impldefs.h>
  53 #include <sys/pci.h>
  54 #include <sys/promif.h>
  55 #include <sys/x86_archext.h>
  56 #include <sys/cpc_impl.h>
  57 #include <sys/uadmin.h>
  58 #include <sys/panic.h>
  59 #include <sys/debug.h>
  60 #include <sys/archsystm.h>
  61 #include <sys/trap.h>
  62 #include <sys/machsystm.h>
  63 #include <sys/sysmacros.h>
  64 #include <sys/cpuvar.h>
  65 #include <sys/rm_platter.h>
  66 #include <sys/privregs.h>
  67 #include <sys/note.h>
  68 #include <sys/pci_intr_lib.h>
  69 #include <sys/spl.h>
  70 #include <sys/clock.h>
  71 #include <sys/dditypes.h>
  72 #include <sys/sunddi.h>
  73 #include <sys/x_call.h>
  74 #include <sys/reboot.h>
  75 #include <sys/hpet.h>
  76 #include <sys/apic_common.h>
  77 #include <sys/apic_timer.h>
  78 
  79 static void     apic_record_ioapic_rdt(void *intrmap_private,
  80                     ioapic_rdt_t *irdt);
  81 static void     apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
  82 
  83 /*
  84  * Common routines between pcplusmp & apix (taken from apic.c).
  85  */
  86 
  87 int     apic_clkinit(int);
  88 hrtime_t apic_gethrtime(void);
  89 void    apic_send_ipi(int, int);
  90 void    apic_set_idlecpu(processorid_t);
  91 void    apic_unset_idlecpu(processorid_t);
  92 void    apic_shutdown(int, int);
  93 void    apic_preshutdown(int, int);
  94 processorid_t   apic_get_next_processorid(processorid_t);
  95 
  96 hrtime_t apic_gettime();
  97 
  98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
  99 
 100 /* Now the ones for Dynamic Interrupt distribution */
 101 int     apic_enable_dynamic_migration = 0;
 102 
 103 /* maximum loop count when sending Start IPIs. */
 104 int apic_sipi_max_loop_count = 0x1000;
 105 
 106 /*
 107  * These variables are frequently accessed in apic_intr_enter(),
 108  * apic_intr_exit and apic_setspl, so group them together
 109  */
 110 volatile uint32_t *apicadr =  NULL;     /* virtual addr of local APIC   */
 111 int apic_setspl_delay = 1;              /* apic_setspl - delay enable   */
 112 int apic_clkvect;
 113 
 114 /* vector at which error interrupts come in */
 115 int apic_errvect;
 116 int apic_enable_error_intr = 1;
 117 int apic_error_display_delay = 100;
 118 
 119 /* vector at which performance counter overflow interrupts come in */
 120 int apic_cpcovf_vect;
 121 int apic_enable_cpcovf_intr = 1;
 122 
 123 /* vector at which CMCI interrupts come in */
 124 int apic_cmci_vect;
 125 extern int cmi_enable_cmci;
 126 extern void cmi_cmci_trap(void);
 127 
 128 kmutex_t cmci_cpu_setup_lock;   /* protects cmci_cpu_setup_registered */
 129 int cmci_cpu_setup_registered;
 130 
 131 /* number of CPUs in power-on transition state */
 132 static int apic_poweron_cnt = 0;
 133 lock_t apic_mode_switch_lock;
 134 
 135 /*
 136  * Patchable global variables.
 137  */
 138 int     apic_forceload = 0;
 139 
 140 int     apic_coarse_hrtime = 1;         /* 0 - use accurate slow gethrtime() */
 141 
 142 int     apic_flat_model = 0;            /* 0 - clustered. 1 - flat */
 143 int     apic_panic_on_nmi = 0;
 144 int     apic_panic_on_apic_error = 0;
 145 
 146 int     apic_verbose = 0;       /* 0x1ff */
 147 
 148 #ifdef DEBUG
 149 int     apic_debug = 0;
 150 int     apic_restrict_vector = 0;
 151 
 152 int     apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
 153 int     apic_debug_msgbufindex = 0;
 154 
 155 #endif /* DEBUG */
 156 
 157 uint_t apic_nticks = 0;
 158 uint_t apic_skipped_redistribute = 0;
 159 
 160 uint_t last_count_read = 0;
 161 lock_t  apic_gethrtime_lock;
 162 volatile int    apic_hrtime_stamp = 0;
 163 volatile hrtime_t apic_nsec_since_boot = 0;
 164 
 165 static  hrtime_t        apic_last_hrtime = 0;
 166 int             apic_hrtime_error = 0;
 167 int             apic_remote_hrterr = 0;
 168 int             apic_num_nmis = 0;
 169 int             apic_apic_error = 0;
 170 int             apic_num_apic_errors = 0;
 171 int             apic_num_cksum_errors = 0;
 172 
 173 int     apic_error = 0;
 174 
 175 static  int     apic_cmos_ssb_set = 0;
 176 
 177 /* use to make sure only one cpu handles the nmi */
 178 lock_t  apic_nmi_lock;
 179 /* use to make sure only one cpu handles the error interrupt */
 180 lock_t  apic_error_lock;
 181 
 182 static  struct {
 183         uchar_t cntl;
 184         uchar_t data;
 185 } aspen_bmc[] = {
 186         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 187         { CC_SMS_WR_NEXT,       0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 188         { CC_SMS_WR_NEXT,       0x84 },         /* DataByte 1: SMS/OS no log */
 189         { CC_SMS_WR_NEXT,       0x2 },          /* DataByte 2: Power Down */
 190         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 3: no pre-timeout */
 191         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 4: timer expir. */
 192         { CC_SMS_WR_NEXT,       0xa },          /* DataByte 5: init countdown */
 193         { CC_SMS_WR_END,        0x0 },          /* DataByte 6: init countdown */
 194 
 195         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 196         { CC_SMS_WR_END,        0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 197 };
 198 
 199 static  struct {
 200         int     port;
 201         uchar_t data;
 202 } sitka_bmc[] = {
 203         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 204         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 205         { SMS_DATA_REGISTER,    0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 206         { SMS_DATA_REGISTER,    0x84 },         /* DataByte 1: SMS/OS no log */
 207         { SMS_DATA_REGISTER,    0x2 },          /* DataByte 2: Power Down */
 208         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 3: no pre-timeout */
 209         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 4: timer expir. */
 210         { SMS_DATA_REGISTER,    0xa },          /* DataByte 5: init countdown */
 211         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 212         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 6: init countdown */
 213 
 214         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 215         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 216         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 217         { SMS_DATA_REGISTER,    0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 218 };
 219 
 220 /* Patchable global variables. */
 221 int             apic_kmdb_on_nmi = 0;           /* 0 - no, 1 - yes enter kmdb */
 222 uint32_t        apic_divide_reg_init = 0;       /* 0 - divide by 2 */
 223 
 224 /* default apic ops without interrupt remapping */
 225 static apic_intrmap_ops_t apic_nointrmap_ops = {
 226         (int (*)(int))return_instr,
 227         (void (*)(int))return_instr,
 228         (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
 229         (void (*)(void *, void *, uint16_t, int))return_instr,
 230         (void (*)(void **))return_instr,
 231         apic_record_ioapic_rdt,
 232         apic_record_msi,
 233 };
 234 
 235 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
 236 apic_cpus_info_t        *apic_cpus = NULL;
 237 cpuset_t        apic_cpumask;
 238 uint_t          apic_picinit_called;
 239 
 240 /* Flag to indicate that we need to shut down all processors */
 241 static uint_t   apic_shutdown_processors;
 242 
 243 /*
 244  * Probe the ioapic method for apix module. Called in apic_probe_common()
 245  */
 246 int
 247 apic_ioapic_method_probe()
 248 {
 249         if (apix_enable == 0)
 250                 return (PSM_SUCCESS);
 251 
 252         /*
 253          * Set IOAPIC EOI handling method. The priority from low to high is:
 254          *      1. IOxAPIC: with EOI register
 255          *      2. IOMMU interrupt mapping
 256          *      3. Mask-Before-EOI method for systems without boot
 257          *      interrupt routing, such as systems with only one IOAPIC;
 258          *      NVIDIA CK8-04/MCP55 systems; systems with bridge solution
 259          *      which disables the boot interrupt routing already.
 260          *      4. Directed EOI
 261          */
 262         if (apic_io_ver[0] >= 0x20)
 263                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
 264         if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
 265                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
 266         if (apic_directed_EOI_supported())
 267                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
 268 
 269         /* fall back to pcplusmp */
 270         if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
 271                 /* make sure apix is after pcplusmp in /etc/mach */
 272                 apix_enable = 0; /* go ahead with pcplusmp install next */
 273                 return (PSM_FAILURE);
 274         }
 275 
 276         return (PSM_SUCCESS);
 277 }
 278 
 279 /*
 280  * handler for APIC Error interrupt. Just print a warning and continue
 281  */
 282 int
 283 apic_error_intr()
 284 {
 285         uint_t  error0, error1, error;
 286         uint_t  i;
 287 
 288         /*
 289          * We need to write before read as per 7.4.17 of system prog manual.
 290          * We do both and or the results to be safe
 291          */
 292         error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 293         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 294         error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 295         error = error0 | error1;
 296 
 297         /*
 298          * Clear the APIC error status (do this on all cpus that enter here)
 299          * (two writes are required due to the semantics of accessing the
 300          * error status register.)
 301          */
 302         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 303         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 304 
 305         /*
 306          * Prevent more than 1 CPU from handling error interrupt causing
 307          * double printing (interleave of characters from multiple
 308          * CPU's when using prom_printf)
 309          */
 310         if (lock_try(&apic_error_lock) == 0)
 311                 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
 312         if (error) {
 313 #if     DEBUG
 314                 if (apic_debug)
 315                         debug_enter("pcplusmp: APIC Error interrupt received");
 316 #endif /* DEBUG */
 317                 if (apic_panic_on_apic_error)
 318                         cmn_err(CE_PANIC,
 319                             "APIC Error interrupt on CPU %d. Status = %x",
 320                             psm_get_cpu_id(), error);
 321                 else {
 322                         if ((error & ~APIC_CS_ERRORS) == 0) {
 323                                 /* cksum error only */
 324                                 apic_error |= APIC_ERR_APIC_ERROR;
 325                                 apic_apic_error |= error;
 326                                 apic_num_apic_errors++;
 327                                 apic_num_cksum_errors++;
 328                         } else {
 329                                 /*
 330                                  * prom_printf is the best shot we have of
 331                                  * something which is problem free from
 332                                  * high level/NMI type of interrupts
 333                                  */
 334                                 prom_printf("APIC Error interrupt on CPU %d. "
 335                                     "Status 0 = %x, Status 1 = %x\n",
 336                                     psm_get_cpu_id(), error0, error1);
 337                                 apic_error |= APIC_ERR_APIC_ERROR;
 338                                 apic_apic_error |= error;
 339                                 apic_num_apic_errors++;
 340                                 for (i = 0; i < apic_error_display_delay; i++) {
 341                                         tenmicrosec();
 342                                 }
 343                                 /*
 344                                  * provide more delay next time limited to
 345                                  * roughly 1 clock tick time
 346                                  */
 347                                 if (apic_error_display_delay < 500)
 348                                         apic_error_display_delay *= 2;
 349                         }
 350                 }
 351                 lock_clear(&apic_error_lock);
 352                 return (DDI_INTR_CLAIMED);
 353         } else {
 354                 lock_clear(&apic_error_lock);
 355                 return (DDI_INTR_UNCLAIMED);
 356         }
 357 }
 358 
 359 /*
 360  * Turn off the mask bit in the performance counter Local Vector Table entry.
 361  */
 362 void
 363 apic_cpcovf_mask_clear(void)
 364 {
 365         apic_reg_ops->apic_write(APIC_PCINT_VECT,
 366             (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
 367 }
 368 
 369 /*ARGSUSED*/
 370 static int
 371 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 372 {
 373         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
 374         return (0);
 375 }
 376 
 377 /*ARGSUSED*/
 378 static int
 379 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 380 {
 381         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
 382         return (0);
 383 }
 384 
 385 /*ARGSUSED*/
 386 int
 387 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
 388 {
 389         cpuset_t        cpu_set;
 390 
 391         CPUSET_ONLY(cpu_set, cpuid);
 392 
 393         switch (what) {
 394                 case CPU_ON:
 395                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 396                             (xc_func_t)apic_cmci_enable);
 397                         break;
 398 
 399                 case CPU_OFF:
 400                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 401                             (xc_func_t)apic_cmci_disable);
 402                         break;
 403 
 404                 default:
 405                         break;
 406         }
 407 
 408         return (0);
 409 }
 410 
 411 static void
 412 apic_disable_local_apic(void)
 413 {
 414         apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 415         apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
 416 
 417         /* local intr reg 0 */
 418         apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
 419 
 420         /* disable NMI */
 421         apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
 422 
 423         /* and error interrupt */
 424         apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
 425 
 426         /* and perf counter intr */
 427         apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
 428 
 429         apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
 430 }
 431 
 432 static void
 433 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
 434 {
 435         int             loop_count;
 436         uint32_t        vector;
 437         uint_t          apicid;
 438         ulong_t         iflag;
 439 
 440         apicid =  apic_cpus[cpun].aci_local_id;
 441 
 442         /*
 443          * Interrupts on current CPU will be disabled during the
 444          * steps in order to avoid unwanted side effects from
 445          * executing interrupt handlers on a problematic BIOS.
 446          */
 447         iflag = intr_clear();
 448 
 449         if (start) {
 450                 outb(CMOS_ADDR, SSB);
 451                 outb(CMOS_DATA, BIOS_SHUTDOWN);
 452         }
 453 
 454         /*
 455          * According to X2APIC specification in section '2.3.5.1' of
 456          * Interrupt Command Register Semantics, the semantics of
 457          * programming the Interrupt Command Register to dispatch an interrupt
 458          * is simplified. A single MSR write to the 64-bit ICR is required
 459          * for dispatching an interrupt. Specifically, with the 64-bit MSR
 460          * interface to ICR, system software is not required to check the
 461          * status of the delivery status bit prior to writing to the ICR
 462          * to send an IPI. With the removal of the Delivery Status bit,
 463          * system software no longer has a reason to read the ICR. It remains
 464          * readable only to aid in debugging.
 465          */
 466 #ifdef  DEBUG
 467         APIC_AV_PENDING_SET();
 468 #else
 469         if (apic_mode == LOCAL_APIC) {
 470                 APIC_AV_PENDING_SET();
 471         }
 472 #endif /* DEBUG */
 473 
 474         /* for integrated - make sure there is one INIT IPI in buffer */
 475         /* for external - it will wake up the cpu */
 476         apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
 477 
 478         /* If only 1 CPU is installed, PENDING bit will not go low */
 479         for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
 480                 if (apic_mode == LOCAL_APIC &&
 481                     apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 482                         apic_ret();
 483                 else
 484                         break;
 485         }
 486 
 487         apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
 488         drv_usecwait(20000);            /* 20 milli sec */
 489 
 490         if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
 491                 /* integrated apic */
 492 
 493                 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
 494                     (APIC_VECTOR_MASK | APIC_IPL_MASK);
 495 
 496                 /* to offset the INIT IPI queue up in the buffer */
 497                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 498                 drv_usecwait(200);              /* 20 micro sec */
 499 
 500                 /*
 501                  * send the second SIPI (Startup IPI) as recommended by Intel
 502                  * software development manual.
 503                  */
 504                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 505                 drv_usecwait(200);      /* 20 micro sec */
 506         }
 507 
 508         intr_restore(iflag);
 509 }
 510 
 511 /*ARGSUSED1*/
 512 int
 513 apic_cpu_start(processorid_t cpun, caddr_t arg)
 514 {
 515         ASSERT(MUTEX_HELD(&cpu_lock));
 516 
 517         if (!apic_cpu_in_range(cpun)) {
 518                 return (EINVAL);
 519         }
 520 
 521         /*
 522          * Switch to apic_common_send_ipi for safety during starting other CPUs.
 523          */
 524         if (apic_mode == LOCAL_X2APIC) {
 525                 apic_switch_ipi_callback(B_TRUE);
 526         }
 527 
 528         apic_cmos_ssb_set = 1;
 529         apic_cpu_send_SIPI(cpun, B_TRUE);
 530 
 531         return (0);
 532 }
 533 
 534 /*
 535  * Put CPU into halted state with interrupts disabled.
 536  */
 537 /*ARGSUSED1*/
 538 int
 539 apic_cpu_stop(processorid_t cpun, caddr_t arg)
 540 {
 541         int             rc;
 542         cpu_t           *cp;
 543         extern cpuset_t cpu_ready_set;
 544         extern void cpu_idle_intercept_cpu(cpu_t *cp);
 545 
 546         ASSERT(MUTEX_HELD(&cpu_lock));
 547 
 548         if (!apic_cpu_in_range(cpun)) {
 549                 return (EINVAL);
 550         }
 551         if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
 552                 return (ENOTSUP);
 553         }
 554 
 555         cp = cpu_get(cpun);
 556         ASSERT(cp != NULL);
 557         ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
 558         ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
 559         ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
 560 
 561         /* Clear CPU_READY flag to disable cross calls. */
 562         cp->cpu_flags &= ~CPU_READY;
 563         CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
 564         rc = xc_flush_cpu(cp);
 565         if (rc != 0) {
 566                 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
 567                 cp->cpu_flags |= CPU_READY;
 568                 return (rc);
 569         }
 570 
 571         /* Intercept target CPU at a safe point before powering it off. */
 572         cpu_idle_intercept_cpu(cp);
 573 
 574         apic_cpu_send_SIPI(cpun, B_FALSE);
 575         cp->cpu_flags &= ~CPU_RUNNING;
 576 
 577         return (0);
 578 }
 579 
 580 int
 581 apic_cpu_ops(psm_cpu_request_t *reqp)
 582 {
 583         if (reqp == NULL) {
 584                 return (EINVAL);
 585         }
 586 
 587         switch (reqp->pcr_cmd) {
 588         case PSM_CPU_ADD:
 589                 return (apic_cpu_add(reqp));
 590 
 591         case PSM_CPU_REMOVE:
 592                 return (apic_cpu_remove(reqp));
 593 
 594         case PSM_CPU_STOP:
 595                 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
 596                     reqp->req.cpu_stop.ctx));
 597 
 598         default:
 599                 return (ENOTSUP);
 600         }
 601 }
 602 
 603 #ifdef  DEBUG
 604 int     apic_break_on_cpu = 9;
 605 int     apic_stretch_interrupts = 0;
 606 int     apic_stretch_ISR = 1 << 3;        /* IPL of 3 matches nothing now */
 607 #endif /* DEBUG */
 608 
 609 /*
 610  * generates an interprocessor interrupt to another CPU. Any changes made to
 611  * this routine must be accompanied by similar changes to
 612  * apic_common_send_ipi().
 613  */
 614 void
 615 apic_send_ipi(int cpun, int ipl)
 616 {
 617         int vector;
 618         ulong_t flag;
 619 
 620         vector = apic_resv_vector[ipl];
 621 
 622         ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
 623 
 624         flag = intr_clear();
 625 
 626         APIC_AV_PENDING_SET();
 627 
 628         apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
 629             vector);
 630 
 631         intr_restore(flag);
 632 }
 633 
 634 
 635 /*ARGSUSED*/
 636 void
 637 apic_set_idlecpu(processorid_t cpun)
 638 {
 639 }
 640 
 641 /*ARGSUSED*/
 642 void
 643 apic_unset_idlecpu(processorid_t cpun)
 644 {
 645 }
 646 
 647 
 648 void
 649 apic_ret()
 650 {
 651 }
 652 
 653 /*
 654  * If apic_coarse_time == 1, then apic_gettime() is used instead of
 655  * apic_gethrtime().  This is used for performance instead of accuracy.
 656  */
 657 
 658 hrtime_t
 659 apic_gettime()
 660 {
 661         int old_hrtime_stamp;
 662         hrtime_t temp;
 663 
 664         /*
 665          * In one-shot mode, we do not keep time, so if anyone
 666          * calls psm_gettime() directly, we vector over to
 667          * gethrtime().
 668          * one-shot mode MUST NOT be enabled if this psm is the source of
 669          * hrtime.
 670          */
 671 
 672         if (apic_oneshot)
 673                 return (gethrtime());
 674 
 675 
 676 gettime_again:
 677         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 678                 apic_ret();
 679 
 680         temp = apic_nsec_since_boot;
 681 
 682         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 683                 goto gettime_again;
 684         }
 685         return (temp);
 686 }
 687 
 688 /*
 689  * Here we return the number of nanoseconds since booting.  Note every
 690  * clock interrupt increments apic_nsec_since_boot by the appropriate
 691  * amount.
 692  */
 693 hrtime_t
 694 apic_gethrtime(void)
 695 {
 696         int curr_timeval, countval, elapsed_ticks;
 697         int old_hrtime_stamp, status;
 698         hrtime_t temp;
 699         uint32_t cpun;
 700         ulong_t oflags;
 701 
 702         /*
 703          * In one-shot mode, we do not keep time, so if anyone
 704          * calls psm_gethrtime() directly, we vector over to
 705          * gethrtime().
 706          * one-shot mode MUST NOT be enabled if this psm is the source of
 707          * hrtime.
 708          */
 709 
 710         if (apic_oneshot)
 711                 return (gethrtime());
 712 
 713         oflags = intr_clear();  /* prevent migration */
 714 
 715         cpun = apic_reg_ops->apic_read(APIC_LID_REG);
 716         if (apic_mode == LOCAL_APIC)
 717                 cpun >>= APIC_ID_BIT_OFFSET;
 718 
 719         lock_set(&apic_gethrtime_lock);
 720 
 721 gethrtime_again:
 722         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 723                 apic_ret();
 724 
 725         /*
 726          * Check to see which CPU we are on.  Note the time is kept on
 727          * the local APIC of CPU 0.  If on CPU 0, simply read the current
 728          * counter.  If on another CPU, issue a remote read command to CPU 0.
 729          */
 730         if (cpun == apic_cpus[0].aci_local_id) {
 731                 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
 732         } else {
 733 #ifdef  DEBUG
 734                 APIC_AV_PENDING_SET();
 735 #else
 736                 if (apic_mode == LOCAL_APIC)
 737                         APIC_AV_PENDING_SET();
 738 #endif /* DEBUG */
 739 
 740                 apic_reg_ops->apic_write_int_cmd(
 741                     apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
 742 
 743                 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
 744                     & AV_READ_PENDING) {
 745                         apic_ret();
 746                 }
 747 
 748                 if (status & AV_REMOTE_STATUS)      /* 1 = valid */
 749                         countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
 750                 else {  /* 0 = invalid */
 751                         apic_remote_hrterr++;
 752                         /*
 753                          * return last hrtime right now, will need more
 754                          * testing if change to retry
 755                          */
 756                         temp = apic_last_hrtime;
 757 
 758                         lock_clear(&apic_gethrtime_lock);
 759 
 760                         intr_restore(oflags);
 761 
 762                         return (temp);
 763                 }
 764         }
 765         if (countval > last_count_read)
 766                 countval = 0;
 767         else
 768                 last_count_read = countval;
 769 
 770         elapsed_ticks = apic_hertz_count - countval;
 771 
 772         curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
 773         temp = apic_nsec_since_boot + curr_timeval;
 774 
 775         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 776                 /* we might have clobbered last_count_read. Restore it */
 777                 last_count_read = apic_hertz_count;
 778                 goto gethrtime_again;
 779         }
 780 
 781         if (temp < apic_last_hrtime) {
 782                 /* return last hrtime if error occurs */
 783                 apic_hrtime_error++;
 784                 temp = apic_last_hrtime;
 785         }
 786         else
 787                 apic_last_hrtime = temp;
 788 
 789         lock_clear(&apic_gethrtime_lock);
 790         intr_restore(oflags);
 791 
 792         return (temp);
 793 }
 794 
 795 /* apic NMI handler */
 796 /*ARGSUSED*/
 797 void
 798 apic_nmi_intr(caddr_t arg, struct regs *rp)
 799 {
 800         if (apic_shutdown_processors) {
 801                 apic_disable_local_apic();
 802                 return;
 803         }
 804 
 805         apic_error |= APIC_ERR_NMI;
 806 
 807         if (!lock_try(&apic_nmi_lock))
 808                 return;
 809         apic_num_nmis++;
 810 
 811         if (apic_kmdb_on_nmi && psm_debugger()) {
 812                 debug_enter("NMI received: entering kmdb\n");
 813         } else if (apic_panic_on_nmi) {
 814                 /* Keep panic from entering kmdb. */
 815                 nopanicdebug = 1;
 816                 panic("NMI received\n");
 817         } else {
 818                 /*
 819                  * prom_printf is the best shot we have of something which is
 820                  * problem free from high level/NMI type of interrupts
 821                  */
 822                 prom_printf("NMI received\n");
 823         }
 824 
 825         lock_clear(&apic_nmi_lock);
 826 }
 827 
 828 processorid_t
 829 apic_get_next_processorid(processorid_t cpu_id)
 830 {
 831 
 832         int i;
 833 
 834         if (cpu_id == -1)
 835                 return ((processorid_t)0);
 836 
 837         for (i = cpu_id + 1; i < NCPU; i++) {
 838                 if (apic_cpu_in_range(i))
 839                         return (i);
 840         }
 841 
 842         return ((processorid_t)-1);
 843 }
 844 
 845 int
 846 apic_cpu_add(psm_cpu_request_t *reqp)
 847 {
 848         int i, rv = 0;
 849         ulong_t iflag;
 850         boolean_t first = B_TRUE;
 851         uchar_t localver;
 852         uint32_t localid, procid;
 853         processorid_t cpuid = (processorid_t)-1;
 854         mach_cpu_add_arg_t *ap;
 855 
 856         ASSERT(reqp != NULL);
 857         reqp->req.cpu_add.cpuid = (processorid_t)-1;
 858 
 859         /* Check whether CPU hotplug is supported. */
 860         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
 861                 return (ENOTSUP);
 862         }
 863 
 864         ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
 865         switch (ap->type) {
 866         case MACH_CPU_ARG_LOCAL_APIC:
 867                 localid = ap->arg.apic.apic_id;
 868                 procid = ap->arg.apic.proc_id;
 869                 if (localid >= 255 || procid > 255) {
 870                         cmn_err(CE_WARN,
 871                             "!apic: apicid(%u) or procid(%u) is invalid.",
 872                             localid, procid);
 873                         return (EINVAL);
 874                 }
 875                 break;
 876 
 877         case MACH_CPU_ARG_LOCAL_X2APIC:
 878                 localid = ap->arg.apic.apic_id;
 879                 procid = ap->arg.apic.proc_id;
 880                 if (localid >= UINT32_MAX) {
 881                         cmn_err(CE_WARN,
 882                             "!apic: x2apicid(%u) is invalid.", localid);
 883                         return (EINVAL);
 884                 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
 885                         cmn_err(CE_WARN, "!apic: system is in APIC mode, "
 886                             "can't support x2APIC processor.");
 887                         return (ENOTSUP);
 888                 }
 889                 break;
 890 
 891         default:
 892                 cmn_err(CE_WARN,
 893                     "!apic: unknown argument type %d to apic_cpu_add().",
 894                     ap->type);
 895                 return (EINVAL);
 896         }
 897 
 898         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
 899         iflag = intr_clear();
 900         lock_set(&apic_ioapic_lock);
 901 
 902         /* Check whether local APIC id already exists. */
 903         for (i = 0; i < apic_nproc; i++) {
 904                 if (!CPU_IN_SET(apic_cpumask, i))
 905                         continue;
 906                 if (apic_cpus[i].aci_local_id == localid) {
 907                         lock_clear(&apic_ioapic_lock);
 908                         intr_restore(iflag);
 909                         cmn_err(CE_WARN,
 910                             "!apic: local apic id %u already exists.",
 911                             localid);
 912                         return (EEXIST);
 913                 } else if (apic_cpus[i].aci_processor_id == procid) {
 914                         lock_clear(&apic_ioapic_lock);
 915                         intr_restore(iflag);
 916                         cmn_err(CE_WARN,
 917                             "!apic: processor id %u already exists.",
 918                             (int)procid);
 919                         return (EEXIST);
 920                 }
 921 
 922                 /*
 923                  * There's no local APIC version number available in MADT table,
 924                  * so assume that all CPUs are homogeneous and use local APIC
 925                  * version number of the first existing CPU.
 926                  */
 927                 if (first) {
 928                         first = B_FALSE;
 929                         localver = apic_cpus[i].aci_local_ver;
 930                 }
 931         }
 932         ASSERT(first == B_FALSE);
 933 
 934         /*
 935          * Try to assign the same cpuid if APIC id exists in the dirty cache.
 936          */
 937         for (i = 0; i < apic_max_nproc; i++) {
 938                 if (CPU_IN_SET(apic_cpumask, i)) {
 939                         ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
 940                         continue;
 941                 }
 942                 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
 943                 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
 944                     apic_cpus[i].aci_local_id == localid &&
 945                     apic_cpus[i].aci_processor_id == procid) {
 946                         cpuid = i;
 947                         break;
 948                 }
 949         }
 950 
 951         /* Avoid the dirty cache and allocate fresh slot if possible. */
 952         if (cpuid == (processorid_t)-1) {
 953                 for (i = 0; i < apic_max_nproc; i++) {
 954                         if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
 955                             (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
 956                                 cpuid = i;
 957                                 break;
 958                         }
 959                 }
 960         }
 961 
 962         /* Try to find any free slot as last resort. */
 963         if (cpuid == (processorid_t)-1) {
 964                 for (i = 0; i < apic_max_nproc; i++) {
 965                         if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
 966                                 cpuid = i;
 967                                 break;
 968                         }
 969                 }
 970         }
 971 
 972         if (cpuid == (processorid_t)-1) {
 973                 lock_clear(&apic_ioapic_lock);
 974                 intr_restore(iflag);
 975                 cmn_err(CE_NOTE,
 976                     "!apic: failed to allocate cpu id for processor %u.",
 977                     procid);
 978                 rv = EAGAIN;
 979         } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
 980                 lock_clear(&apic_ioapic_lock);
 981                 intr_restore(iflag);
 982                 cmn_err(CE_NOTE,
 983                     "!apic: failed to build mapping for processor %u.",
 984                     procid);
 985                 rv = EBUSY;
 986         } else {
 987                 ASSERT(cpuid >= 0 && cpuid < NCPU);
 988                 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
 989                 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
 990                 apic_cpus[cpuid].aci_processor_id = procid;
 991                 apic_cpus[cpuid].aci_local_id = localid;
 992                 apic_cpus[cpuid].aci_local_ver = localver;
 993                 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
 994                 if (cpuid >= apic_nproc) {
 995                         apic_nproc = cpuid + 1;
 996                 }
 997                 lock_clear(&apic_ioapic_lock);
 998                 intr_restore(iflag);
 999                 reqp->req.cpu_add.cpuid = cpuid;
1000         }
1001 
1002         return (rv);
1003 }
1004 
1005 int
1006 apic_cpu_remove(psm_cpu_request_t *reqp)
1007 {
1008         int i;
1009         ulong_t iflag;
1010         processorid_t cpuid;
1011 
1012         /* Check whether CPU hotplug is supported. */
1013         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1014                 return (ENOTSUP);
1015         }
1016 
1017         cpuid = reqp->req.cpu_remove.cpuid;
1018 
1019         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1020         iflag = intr_clear();
1021         lock_set(&apic_ioapic_lock);
1022 
1023         if (!apic_cpu_in_range(cpuid)) {
1024                 lock_clear(&apic_ioapic_lock);
1025                 intr_restore(iflag);
1026                 cmn_err(CE_WARN,
1027                     "!apic: cpuid %d doesn't exist in apic_cpus array.",
1028                     cpuid);
1029                 return (ENODEV);
1030         }
1031         ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1032 
1033         if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1034                 lock_clear(&apic_ioapic_lock);
1035                 intr_restore(iflag);
1036                 return (ENOENT);
1037         }
1038 
1039         if (cpuid == apic_nproc - 1) {
1040                 /*
1041                  * We are removing the highest numbered cpuid so we need to
1042                  * find the next highest cpuid as the new value for apic_nproc.
1043                  */
1044                 for (i = apic_nproc; i > 0; i--) {
1045                         if (CPU_IN_SET(apic_cpumask, i - 1)) {
1046                                 apic_nproc = i;
1047                                 break;
1048                         }
1049                 }
1050                 /* at least one CPU left */
1051                 ASSERT(i > 0);
1052         }
1053         CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1054         /* mark slot as free and keep it in the dirty cache */
1055         apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1056 
1057         lock_clear(&apic_ioapic_lock);
1058         intr_restore(iflag);
1059 
1060         return (0);
1061 }
1062 
1063 /*
1064  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1065  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1066  */
1067 uint_t
1068 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1069 {
1070         uint8_t         pit_tick_lo;
1071         uint16_t        pit_tick, target_pit_tick;
1072         uint32_t        start_apic_tick, end_apic_tick;
1073         ulong_t         iflag;
1074         uint32_t        reg;
1075 
1076         reg = addr + APIC_CURR_COUNT - apicadr;
1077 
1078         iflag = intr_clear();
1079 
1080         do {
1081                 pit_tick_lo = inb(PITCTR0_PORT);
1082                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1083         } while (pit_tick < APIC_TIME_MIN ||
1084             pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1085 
1086         /*
1087          * Wait for the 8254 to decrement by 5 ticks to ensure
1088          * we didn't start in the middle of a tick.
1089          * Compare with 0x10 for the wrap around case.
1090          */
1091         target_pit_tick = pit_tick - 5;
1092         do {
1093                 pit_tick_lo = inb(PITCTR0_PORT);
1094                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1095         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1096 
1097         start_apic_tick = apic_reg_ops->apic_read(reg);
1098 
1099         /*
1100          * Wait for the 8254 to decrement by
1101          * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1102          */
1103         target_pit_tick = pit_tick - APIC_TIME_COUNT;
1104         do {
1105                 pit_tick_lo = inb(PITCTR0_PORT);
1106                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1107         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1108 
1109         end_apic_tick = apic_reg_ops->apic_read(reg);
1110 
1111         *pit_ticks_adj = target_pit_tick - pit_tick;
1112 
1113         intr_restore(iflag);
1114 
1115         return (start_apic_tick - end_apic_tick);
1116 }
1117 
1118 /*
1119  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1120  * frequency.  Note at this stage in the boot sequence, the boot processor
1121  * is the only active processor.
1122  * hertz value of 0 indicates a one-shot mode request.  In this case
1123  * the function returns the resolution (in nanoseconds) for the hardware
1124  * timer interrupt.  If one-shot mode capability is not available,
1125  * the return value will be 0. apic_enable_oneshot is a global switch
1126  * for disabling the functionality.
1127  * A non-zero positive value for hertz indicates a periodic mode request.
1128  * In this case the hardware will be programmed to generate clock interrupts
1129  * at hertz frequency and returns the resolution of interrupts in
1130  * nanosecond.
1131  */
1132 
1133 int
1134 apic_clkinit(int hertz)
1135 {
1136         int             ret;
1137 
1138         apic_int_busy_mark = (apic_int_busy_mark *
1139             apic_sample_factor_redistribution) / 100;
1140         apic_int_free_mark = (apic_int_free_mark *
1141             apic_sample_factor_redistribution) / 100;
1142         apic_diff_for_redistribution = (apic_diff_for_redistribution *
1143             apic_sample_factor_redistribution) / 100;
1144 
1145         ret = apic_timer_init(hertz);
1146         return (ret);
1147 
1148 }
1149 
1150 /*
1151  * apic_preshutdown:
1152  * Called early in shutdown whilst we can still access filesystems to do
1153  * things like loading modules which will be required to complete shutdown
1154  * after filesystems are all unmounted.
1155  */
1156 void
1157 apic_preshutdown(int cmd, int fcn)
1158 {
1159         APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1160             cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1161 }
1162 
1163 void
1164 apic_shutdown(int cmd, int fcn)
1165 {
1166         int restarts, attempts;
1167         int i;
1168         uchar_t byte;
1169         ulong_t iflag;
1170 
1171         hpet_acpi_fini();
1172 
1173         /* Send NMI to all CPUs except self to do per processor shutdown */
1174         iflag = intr_clear();
1175 #ifdef  DEBUG
1176         APIC_AV_PENDING_SET();
1177 #else
1178         if (apic_mode == LOCAL_APIC)
1179                 APIC_AV_PENDING_SET();
1180 #endif /* DEBUG */
1181         apic_shutdown_processors = 1;
1182         apic_reg_ops->apic_write(APIC_INT_CMD1,
1183             AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1184 
1185         /* restore cmos shutdown byte before reboot */
1186         if (apic_cmos_ssb_set) {
1187                 outb(CMOS_ADDR, SSB);
1188                 outb(CMOS_DATA, 0);
1189         }
1190 
1191         ioapic_disable_redirection();
1192 
1193         /*      disable apic mode if imcr present       */
1194         if (apic_imcrp) {
1195                 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1196                 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1197         }
1198 
1199         apic_disable_local_apic();
1200 
1201         intr_restore(iflag);
1202 
1203         /* remainder of function is for shutdown cases only */
1204         if (cmd != A_SHUTDOWN)
1205                 return;
1206 
1207         /*
1208          * Switch system back into Legacy-Mode if using ACPI and
1209          * not powering-off.  Some BIOSes need to remain in ACPI-mode
1210          * for power-off to succeed (Dell Dimension 4600)
1211          * Do not disable ACPI while doing fastreboot
1212          */
1213         if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1214                 (void) AcpiDisable();
1215 
1216         if (fcn == AD_FASTREBOOT) {
1217                 apic_reg_ops->apic_write(APIC_INT_CMD1,
1218                     AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1219         }
1220 
1221         /* remainder of function is for shutdown+poweroff case only */
1222         if (fcn != AD_POWEROFF)
1223                 return;
1224 
1225         switch (apic_poweroff_method) {
1226                 case APIC_POWEROFF_VIA_RTC:
1227 
1228                         /* select the extended NVRAM bank in the RTC */
1229                         outb(CMOS_ADDR, RTC_REGA);
1230                         byte = inb(CMOS_DATA);
1231                         outb(CMOS_DATA, (byte | EXT_BANK));
1232 
1233                         outb(CMOS_ADDR, PFR_REG);
1234 
1235                         /* for Predator must toggle the PAB bit */
1236                         byte = inb(CMOS_DATA);
1237 
1238                         /*
1239                          * clear power active bar, wakeup alarm and
1240                          * kickstart
1241                          */
1242                         byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1243                         outb(CMOS_DATA, byte);
1244 
1245                         /* delay before next write */
1246                         drv_usecwait(1000);
1247 
1248                         /* for S40 the following would suffice */
1249                         byte = inb(CMOS_DATA);
1250 
1251                         /* power active bar control bit */
1252                         byte |= PAB_CBIT;
1253                         outb(CMOS_DATA, byte);
1254 
1255                         break;
1256 
1257                 case APIC_POWEROFF_VIA_ASPEN_BMC:
1258                         restarts = 0;
1259 restart_aspen_bmc:
1260                         if (++restarts == 3)
1261                                 break;
1262                         attempts = 0;
1263                         do {
1264                                 byte = inb(MISMIC_FLAG_REGISTER);
1265                                 byte &= MISMIC_BUSY_MASK;
1266                                 if (byte != 0) {
1267                                         drv_usecwait(1000);
1268                                         if (attempts >= 3)
1269                                                 goto restart_aspen_bmc;
1270                                         ++attempts;
1271                                 }
1272                         } while (byte != 0);
1273                         outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1274                         byte = inb(MISMIC_FLAG_REGISTER);
1275                         byte |= 0x1;
1276                         outb(MISMIC_FLAG_REGISTER, byte);
1277                         i = 0;
1278                         for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1279                             i++) {
1280                                 attempts = 0;
1281                                 do {
1282                                         byte = inb(MISMIC_FLAG_REGISTER);
1283                                         byte &= MISMIC_BUSY_MASK;
1284                                         if (byte != 0) {
1285                                                 drv_usecwait(1000);
1286                                                 if (attempts >= 3)
1287                                                         goto restart_aspen_bmc;
1288                                                 ++attempts;
1289                                         }
1290                                 } while (byte != 0);
1291                                 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1292                                 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1293                                 byte = inb(MISMIC_FLAG_REGISTER);
1294                                 byte |= 0x1;
1295                                 outb(MISMIC_FLAG_REGISTER, byte);
1296                         }
1297                         break;
1298 
1299                 case APIC_POWEROFF_VIA_SITKA_BMC:
1300                         restarts = 0;
1301 restart_sitka_bmc:
1302                         if (++restarts == 3)
1303                                 break;
1304                         attempts = 0;
1305                         do {
1306                                 byte = inb(SMS_STATUS_REGISTER);
1307                                 byte &= SMS_STATE_MASK;
1308                                 if ((byte == SMS_READ_STATE) ||
1309                                     (byte == SMS_WRITE_STATE)) {
1310                                         drv_usecwait(1000);
1311                                         if (attempts >= 3)
1312                                                 goto restart_sitka_bmc;
1313                                         ++attempts;
1314                                 }
1315                         } while ((byte == SMS_READ_STATE) ||
1316                             (byte == SMS_WRITE_STATE));
1317                         outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1318                         i = 0;
1319                         for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1320                             i++) {
1321                                 attempts = 0;
1322                                 do {
1323                                         byte = inb(SMS_STATUS_REGISTER);
1324                                         byte &= SMS_IBF_MASK;
1325                                         if (byte != 0) {
1326                                                 drv_usecwait(1000);
1327                                                 if (attempts >= 3)
1328                                                         goto restart_sitka_bmc;
1329                                                 ++attempts;
1330                                         }
1331                                 } while (byte != 0);
1332                                 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1333                         }
1334                         break;
1335 
1336                 case APIC_POWEROFF_NONE:
1337 
1338                         /* If no APIC direct method, we will try using ACPI */
1339                         if (apic_enable_acpi) {
1340                                 if (acpi_poweroff() == 1)
1341                                         return;
1342                         } else
1343                                 return;
1344 
1345                         break;
1346         }
1347         /*
1348          * Wait a limited time here for power to go off.
1349          * If the power does not go off, then there was a
1350          * problem and we should continue to the halt which
1351          * prints a message for the user to press a key to
1352          * reboot.
1353          */
1354         drv_usecwait(7000000); /* wait seven seconds */
1355 
1356 }
1357 
1358 cyclic_id_t apic_cyclic_id;
1359 
1360 /*
1361  * The following functions are in the platform specific file so that they
1362  * can be different functions depending on whether we are running on
1363  * bare metal or a hypervisor.
1364  */
1365 
1366 /*
1367  * map an apic for memory-mapped access
1368  */
1369 uint32_t *
1370 mapin_apic(uint32_t addr, size_t len, int flags)
1371 {
1372         return ((void *)psm_map_phys(addr, len, flags));
1373 }
1374 
1375 uint32_t *
1376 mapin_ioapic(uint32_t addr, size_t len, int flags)
1377 {
1378         return (mapin_apic(addr, len, flags));
1379 }
1380 
1381 /*
1382  * unmap an apic
1383  */
1384 void
1385 mapout_apic(caddr_t addr, size_t len)
1386 {
1387         psm_unmap_phys(addr, len);
1388 }
1389 
1390 void
1391 mapout_ioapic(caddr_t addr, size_t len)
1392 {
1393         mapout_apic(addr, len);
1394 }
1395 
1396 uint32_t
1397 ioapic_read(int ioapic_ix, uint32_t reg)
1398 {
1399         volatile uint32_t *ioapic;
1400 
1401         ioapic = apicioadr[ioapic_ix];
1402         ioapic[APIC_IO_REG] = reg;
1403         return (ioapic[APIC_IO_DATA]);
1404 }
1405 
1406 void
1407 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1408 {
1409         volatile uint32_t *ioapic;
1410 
1411         ioapic = apicioadr[ioapic_ix];
1412         ioapic[APIC_IO_REG] = reg;
1413         ioapic[APIC_IO_DATA] = value;
1414 }
1415 
1416 void
1417 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1418 {
1419         volatile uint32_t *ioapic;
1420 
1421         ioapic = apicioadr[ioapic_ix];
1422         ioapic[APIC_IO_EOI] = value;
1423 }
1424 
1425 /*
1426  * Round-robin algorithm to find the next CPU with interrupts enabled.
1427  * It can't share the same static variable apic_next_bind_cpu with
1428  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1429  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1430  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1431  * are called.  However, the pcplusmp driver assumes that there will be
1432  * boot_ncpus CPUs configured eventually so it tries to distribute all
1433  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1434  * interrupts being targetted at CPU1, we need to use a dedicated static
1435  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1436  */
1437 
1438 processorid_t
1439 apic_find_cpu(int flag)
1440 {
1441         int i;
1442         static processorid_t acid = 0;
1443 
1444         /* Find the first CPU with the passed-in flag set */
1445         for (i = 0; i < apic_nproc; i++) {
1446                 if (++acid >= apic_nproc) {
1447                         acid = 0;
1448                 }
1449                 if (apic_cpu_in_range(acid) &&
1450                     (apic_cpus[acid].aci_status & flag)) {
1451                         break;
1452                 }
1453         }
1454 
1455         ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1456         return (acid);
1457 }
1458 
1459 /*
1460  * Switch between safe and x2APIC IPI sending method.
1461  * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1462  * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1463  * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1464  * apic_common_send_ipi, which detects current local APIC mode and use right
1465  * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1466  * won't return to zero, so apic_common_send_ipi will always be used.
1467  * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1468  * failed to start up because those failed CPUs may recover itself later at
1469  * unpredictable time.
1470  */
1471 void
1472 apic_switch_ipi_callback(boolean_t enter)
1473 {
1474         ulong_t iflag;
1475         struct psm_ops *pops = psmops;
1476 
1477         iflag = intr_clear();
1478         lock_set(&apic_mode_switch_lock);
1479         if (enter) {
1480                 ASSERT(apic_poweron_cnt >= 0);
1481                 if (apic_poweron_cnt == 0) {
1482                         pops->psm_send_ipi = apic_common_send_ipi;
1483                         send_dirintf = pops->psm_send_ipi;
1484                 }
1485                 apic_poweron_cnt++;
1486         } else {
1487                 ASSERT(apic_poweron_cnt > 0);
1488                 apic_poweron_cnt--;
1489                 if (apic_poweron_cnt == 0) {
1490                         pops->psm_send_ipi = x2apic_send_ipi;
1491                         send_dirintf = pops->psm_send_ipi;
1492                 }
1493         }
1494         lock_clear(&apic_mode_switch_lock);
1495         intr_restore(iflag);
1496 }
1497 
1498 void
1499 apic_intrmap_init(int apic_mode)
1500 {
1501         int suppress_brdcst_eoi = 0;
1502 
1503         /*
1504          * Intel Software Developer's Manual 3A, 10.12.7:
1505          *
1506          * Routing of device interrupts to local APIC units operating in
1507          * x2APIC mode requires use of the interrupt-remapping architecture
1508          * specified in the Intel Virtualization Technology for Directed
1509          * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1510          * for and software must enable this interrupt remapping with
1511          * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1512          * the local APIC units.
1513          *
1514          *
1515          * In other words, to use the APIC in x2APIC mode, we need interrupt
1516          * remapping.  Since we don't start up the IOMMU by default, we
1517          * won't be able to do any interrupt remapping and therefore have to
1518          * use the APIC in traditional 'local APIC' mode with memory mapped
1519          * I/O.
1520          */
1521 
1522         if (psm_vt_ops != NULL) {
1523                 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1524                     apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1525 
1526                         apic_vt_ops = psm_vt_ops;
1527 
1528                         /*
1529                          * We leverage the interrupt remapping engine to
1530                          * suppress broadcast EOI; thus we must send the
1531                          * directed EOI with the directed-EOI handler.
1532                          */
1533                         if (apic_directed_EOI_supported() == 0) {
1534                                 suppress_brdcst_eoi = 1;
1535                         }
1536 
1537                         apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1538 
1539                         if (apic_detect_x2apic()) {
1540                                 apic_enable_x2apic();
1541                         }
1542 
1543                         if (apic_directed_EOI_supported() == 0) {
1544                                 apic_set_directed_EOI_handler();
1545                         }
1546                 }
1547         }
1548 }
1549 
1550 /*ARGSUSED*/
1551 static void
1552 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1553 {
1554         irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1555 }
1556 
1557 /*ARGSUSED*/
1558 static void
1559 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1560 {
1561         mregs->mr_addr = MSI_ADDR_HDR |
1562             (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1563             (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1564             (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1565         mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1566             mregs->mr_data;
1567 }
1568 
1569 /*
1570  * Functions from apic_introp.c
1571  *
1572  * Those functions are used by apic_intr_ops().
1573  */
1574 
1575 /*
1576  * MSI support flag:
1577  * reflects whether MSI is supported at APIC level
1578  * it can also be patched through /etc/system
1579  *
1580  *  0 = default value - don't know and need to call apic_check_msi_support()
1581  *      to find out then set it accordingly
1582  *  1 = supported
1583  * -1 = not supported
1584  */
1585 int     apic_support_msi = 0;
1586 
1587 /* Multiple vector support for MSI-X */
1588 int     apic_msix_enable = 1;
1589 
1590 /* Multiple vector support for MSI */
1591 int     apic_multi_msi_enable = 1;
1592 
1593 /*
1594  * Check whether the system supports MSI.
1595  *
1596  * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1597  * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1598  * return PSM_SUCCESS to indicate this system supports MSI.
1599  *
1600  * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1601  * by detecting if we are running inside the KVM hypervisor, which guarantees
1602  * this version number.)
1603  */
1604 int
1605 apic_check_msi_support()
1606 {
1607         dev_info_t *cdip;
1608         char dev_type[16];
1609         int dev_len;
1610 
1611         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1612 
1613         /*
1614          * check whether the first level children of root_node have
1615          * PCI-E or PCI capability.
1616          */
1617         for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1618             cdip = ddi_get_next_sibling(cdip)) {
1619 
1620                 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1621                     " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1622                     ddi_driver_name(cdip), ddi_binding_name(cdip),
1623                     ddi_node_name(cdip)));
1624                 dev_len = sizeof (dev_type);
1625                 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1626                     "device_type", (caddr_t)dev_type, &dev_len)
1627                     != DDI_PROP_SUCCESS)
1628                         continue;
1629                 if (strcmp(dev_type, "pciex") == 0)
1630                         return (PSM_SUCCESS);
1631                 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
1632                         return (PSM_SUCCESS);
1633         }
1634 
1635         /* MSI is not supported on this system */
1636         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1637             "device_type found\n"));
1638         return (PSM_FAILURE);
1639 }
1640 
1641 /*
1642  * apic_pci_msi_unconfigure:
1643  *
1644  * This and next two interfaces are copied from pci_intr_lib.c
1645  * Do ensure that these two files stay in sync.
1646  * These needed to be copied over here to avoid a deadlock situation on
1647  * certain mp systems that use MSI interrupts.
1648  *
1649  * IMPORTANT regards next three interfaces:
1650  * i) are called only for MSI/X interrupts.
1651  * ii) called with interrupts disabled, and must not block
1652  */
1653 void
1654 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1655 {
1656         ushort_t                msi_ctrl;
1657         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1658         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1659 
1660         ASSERT((handle != NULL) && (cap_ptr != 0));
1661 
1662         if (type == DDI_INTR_TYPE_MSI) {
1663                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1664                 msi_ctrl &= (~PCI_MSI_MME_MASK);
1665                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1666                 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1667 
1668                 if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1669                         pci_config_put16(handle,
1670                             cap_ptr + PCI_MSI_64BIT_DATA, 0);
1671                         pci_config_put32(handle,
1672                             cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1673                 } else {
1674                         pci_config_put16(handle,
1675                             cap_ptr + PCI_MSI_32BIT_DATA, 0);
1676                 }
1677 
1678         } else if (type == DDI_INTR_TYPE_MSIX) {
1679                 uintptr_t       off;
1680                 uint32_t        mask;
1681                 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1682 
1683                 ASSERT(msix_p != NULL);
1684 
1685                 /* Offset into "inum"th entry in the MSI-X table & mask it */
1686                 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1687                     PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1688 
1689                 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1690 
1691                 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1692 
1693                 /* Offset into the "inum"th entry in the MSI-X table */
1694                 off = (uintptr_t)msix_p->msix_tbl_addr +
1695                     (inum * PCI_MSIX_VECTOR_SIZE);
1696 
1697                 /* Reset the "data" and "addr" bits */
1698                 ddi_put32(msix_p->msix_tbl_hdl,
1699                     (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1700                 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1701         }
1702 }
1703 
1704 /*
1705  * apic_pci_msi_disable_mode:
1706  */
1707 void
1708 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1709 {
1710         ushort_t                msi_ctrl;
1711         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1712         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1713 
1714         ASSERT((handle != NULL) && (cap_ptr != 0));
1715 
1716         if (type == DDI_INTR_TYPE_MSI) {
1717                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1718                 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1719                         return;
1720 
1721                 msi_ctrl &= ~PCI_MSI_ENABLE_BIT;    /* MSI disable */
1722                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1723 
1724         } else if (type == DDI_INTR_TYPE_MSIX) {
1725                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1726                 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1727                         msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1728                         pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1729                             msi_ctrl);
1730                 }
1731         }
1732 }
1733 
1734 uint32_t
1735 apic_get_localapicid(uint32_t cpuid)
1736 {
1737         ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1738 
1739         return (apic_cpus[cpuid].aci_local_id);
1740 }
1741 
1742 uchar_t
1743 apic_get_ioapicid(uchar_t ioapicindex)
1744 {
1745         ASSERT(ioapicindex < MAX_IO_APIC);
1746 
1747         return (apic_io_id[ioapicindex]);
1748 }