1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
  27  * Copyright (c) 2016 by Delphix. All rights reserved.
  28  */
  29 
  30 /*
  31  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
  32  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
  33  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  34  * PSMI 1.5 extensions are supported in Solaris Nevada.
  35  * PSMI 1.6 extensions are supported in Solaris Nevada.
  36  * PSMI 1.7 extensions are supported in Solaris Nevada.
  37  */
  38 #define PSMI_1_7
  39 
  40 #include <sys/processor.h>
  41 #include <sys/time.h>
  42 #include <sys/psm.h>
  43 #include <sys/smp_impldefs.h>
  44 #include <sys/cram.h>
  45 #include <sys/acpi/acpi.h>
  46 #include <sys/acpica.h>
  47 #include <sys/psm_common.h>
  48 #include <sys/apic.h>
  49 #include <sys/pit.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/ddi_impldefs.h>
  53 #include <sys/pci.h>
  54 #include <sys/promif.h>
  55 #include <sys/x86_archext.h>
  56 #include <sys/cpc_impl.h>
  57 #include <sys/uadmin.h>
  58 #include <sys/panic.h>
  59 #include <sys/debug.h>
  60 #include <sys/archsystm.h>
  61 #include <sys/trap.h>
  62 #include <sys/machsystm.h>
  63 #include <sys/sysmacros.h>
  64 #include <sys/cpuvar.h>
  65 #include <sys/rm_platter.h>
  66 #include <sys/privregs.h>
  67 #include <sys/note.h>
  68 #include <sys/pci_intr_lib.h>
  69 #include <sys/spl.h>
  70 #include <sys/clock.h>
  71 #include <sys/dditypes.h>
  72 #include <sys/sunddi.h>
  73 #include <sys/x_call.h>
  74 #include <sys/reboot.h>
  75 #include <sys/hpet.h>
  76 #include <sys/apic_common.h>
  77 #include <sys/apic_timer.h>
  78 
  79 static void     apic_record_ioapic_rdt(void *intrmap_private,
  80                     ioapic_rdt_t *irdt);
  81 static void     apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
  82 
  83 /*
  84  * Common routines between pcplusmp & apix (taken from apic.c).
  85  */
  86 
  87 int     apic_clkinit(int);
  88 hrtime_t apic_gethrtime(void);
  89 void    apic_send_ipi(int, int);
  90 void    apic_set_idlecpu(processorid_t);
  91 void    apic_unset_idlecpu(processorid_t);
  92 void    apic_shutdown(int, int);
  93 void    apic_preshutdown(int, int);
  94 processorid_t   apic_get_next_processorid(processorid_t);
  95 
  96 hrtime_t apic_gettime();
  97 
  98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
  99 
 100 /* Now the ones for Dynamic Interrupt distribution */
 101 int     apic_enable_dynamic_migration = 0;
 102 
 103 /* maximum loop count when sending Start IPIs. */
 104 int apic_sipi_max_loop_count = 0x1000;
 105 
 106 /*
 107  * These variables are frequently accessed in apic_intr_enter(),
 108  * apic_intr_exit and apic_setspl, so group them together
 109  */
 110 volatile uint32_t *apicadr =  NULL;     /* virtual addr of local APIC   */
 111 int apic_setspl_delay = 1;              /* apic_setspl - delay enable   */
 112 int apic_clkvect;
 113 
 114 /* vector at which error interrupts come in */
 115 int apic_errvect;
 116 int apic_enable_error_intr = 1;
 117 int apic_error_display_delay = 100;
 118 
 119 /* vector at which performance counter overflow interrupts come in */
 120 int apic_cpcovf_vect;
 121 int apic_enable_cpcovf_intr = 1;
 122 
 123 /* vector at which CMCI interrupts come in */
 124 int apic_cmci_vect;
 125 extern int cmi_enable_cmci;
 126 extern void cmi_cmci_trap(void);
 127 
 128 kmutex_t cmci_cpu_setup_lock;   /* protects cmci_cpu_setup_registered */
 129 int cmci_cpu_setup_registered;
 130 
 131 lock_t apic_mode_switch_lock;
 132 
 133 /*
 134  * Patchable global variables.
 135  */
 136 int     apic_forceload = 0;
 137 
 138 int     apic_coarse_hrtime = 1;         /* 0 - use accurate slow gethrtime() */
 139 
 140 int     apic_flat_model = 0;            /* 0 - clustered. 1 - flat */
 141 int     apic_panic_on_nmi = 0;
 142 int     apic_panic_on_apic_error = 0;
 143 
 144 int     apic_verbose = 0;       /* 0x1ff */
 145 
 146 #ifdef DEBUG
 147 int     apic_debug = 0;
 148 int     apic_restrict_vector = 0;
 149 
 150 int     apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
 151 int     apic_debug_msgbufindex = 0;
 152 
 153 #endif /* DEBUG */
 154 
 155 uint_t apic_nticks = 0;
 156 uint_t apic_skipped_redistribute = 0;
 157 
 158 uint_t last_count_read = 0;
 159 lock_t  apic_gethrtime_lock;
 160 volatile int    apic_hrtime_stamp = 0;
 161 volatile hrtime_t apic_nsec_since_boot = 0;
 162 
 163 static  hrtime_t        apic_last_hrtime = 0;
 164 int             apic_hrtime_error = 0;
 165 int             apic_remote_hrterr = 0;
 166 int             apic_num_nmis = 0;
 167 int             apic_apic_error = 0;
 168 int             apic_num_apic_errors = 0;
 169 int             apic_num_cksum_errors = 0;
 170 
 171 int     apic_error = 0;
 172 
 173 static  int     apic_cmos_ssb_set = 0;
 174 
 175 /* use to make sure only one cpu handles the nmi */
 176 lock_t  apic_nmi_lock;
 177 /* use to make sure only one cpu handles the error interrupt */
 178 lock_t  apic_error_lock;
 179 
 180 static  struct {
 181         uchar_t cntl;
 182         uchar_t data;
 183 } aspen_bmc[] = {
 184         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 185         { CC_SMS_WR_NEXT,       0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 186         { CC_SMS_WR_NEXT,       0x84 },         /* DataByte 1: SMS/OS no log */
 187         { CC_SMS_WR_NEXT,       0x2 },          /* DataByte 2: Power Down */
 188         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 3: no pre-timeout */
 189         { CC_SMS_WR_NEXT,       0x0 },          /* DataByte 4: timer expir. */
 190         { CC_SMS_WR_NEXT,       0xa },          /* DataByte 5: init countdown */
 191         { CC_SMS_WR_END,        0x0 },          /* DataByte 6: init countdown */
 192 
 193         { CC_SMS_WR_START,      0x18 },         /* NetFn/LUN */
 194         { CC_SMS_WR_END,        0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 195 };
 196 
 197 static  struct {
 198         int     port;
 199         uchar_t data;
 200 } sitka_bmc[] = {
 201         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 202         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 203         { SMS_DATA_REGISTER,    0x24 },         /* Cmd SET_WATCHDOG_TIMER */
 204         { SMS_DATA_REGISTER,    0x84 },         /* DataByte 1: SMS/OS no log */
 205         { SMS_DATA_REGISTER,    0x2 },          /* DataByte 2: Power Down */
 206         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 3: no pre-timeout */
 207         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 4: timer expir. */
 208         { SMS_DATA_REGISTER,    0xa },          /* DataByte 5: init countdown */
 209         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 210         { SMS_DATA_REGISTER,    0x0 },          /* DataByte 6: init countdown */
 211 
 212         { SMS_COMMAND_REGISTER, SMS_WRITE_START },
 213         { SMS_DATA_REGISTER,    0x18 },         /* NetFn/LUN */
 214         { SMS_COMMAND_REGISTER, SMS_WRITE_END },
 215         { SMS_DATA_REGISTER,    0x22 }          /* Cmd RESET_WATCHDOG_TIMER */
 216 };
 217 
 218 /* Patchable global variables. */
 219 int             apic_kmdb_on_nmi = 0;           /* 0 - no, 1 - yes enter kmdb */
 220 uint32_t        apic_divide_reg_init = 0;       /* 0 - divide by 2 */
 221 
 222 /* default apic ops without interrupt remapping */
 223 static apic_intrmap_ops_t apic_nointrmap_ops = {
 224         (int (*)(int))return_instr,
 225         (void (*)(int))return_instr,
 226         (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
 227         (void (*)(void *, void *, uint16_t, int))return_instr,
 228         (void (*)(void **))return_instr,
 229         apic_record_ioapic_rdt,
 230         apic_record_msi,
 231 };
 232 
 233 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
 234 apic_cpus_info_t        *apic_cpus = NULL;
 235 cpuset_t        apic_cpumask;
 236 uint_t          apic_picinit_called;
 237 
 238 /* Flag to indicate that we need to shut down all processors */
 239 static uint_t   apic_shutdown_processors;
 240 
 241 /*
 242  * Probe the ioapic method for apix module. Called in apic_probe_common()
 243  */
 244 int
 245 apic_ioapic_method_probe()
 246 {
 247         if (apix_enable == 0)
 248                 return (PSM_SUCCESS);
 249 
 250         /*
 251          * Set IOAPIC EOI handling method. The priority from low to high is:
 252          *      1. IOxAPIC: with EOI register
 253          *      2. IOMMU interrupt mapping
 254          *      3. Mask-Before-EOI method for systems without boot
 255          *      interrupt routing, such as systems with only one IOAPIC;
 256          *      NVIDIA CK8-04/MCP55 systems; systems with bridge solution
 257          *      which disables the boot interrupt routing already.
 258          *      4. Directed EOI
 259          */
 260         if (apic_io_ver[0] >= 0x20)
 261                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
 262         if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
 263                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
 264         if (apic_directed_EOI_supported())
 265                 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
 266 
 267         /* fall back to pcplusmp */
 268         if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
 269                 /* make sure apix is after pcplusmp in /etc/mach */
 270                 apix_enable = 0; /* go ahead with pcplusmp install next */
 271                 return (PSM_FAILURE);
 272         }
 273 
 274         return (PSM_SUCCESS);
 275 }
 276 
 277 /*
 278  * handler for APIC Error interrupt. Just print a warning and continue
 279  */
 280 int
 281 apic_error_intr()
 282 {
 283         uint_t  error0, error1, error;
 284         uint_t  i;
 285 
 286         /*
 287          * We need to write before read as per 7.4.17 of system prog manual.
 288          * We do both and or the results to be safe
 289          */
 290         error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 291         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 292         error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
 293         error = error0 | error1;
 294 
 295         /*
 296          * Clear the APIC error status (do this on all cpus that enter here)
 297          * (two writes are required due to the semantics of accessing the
 298          * error status register.)
 299          */
 300         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 301         apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
 302 
 303         /*
 304          * Prevent more than 1 CPU from handling error interrupt causing
 305          * double printing (interleave of characters from multiple
 306          * CPU's when using prom_printf)
 307          */
 308         if (lock_try(&apic_error_lock) == 0)
 309                 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
 310         if (error) {
 311 #if     DEBUG
 312                 if (apic_debug)
 313                         debug_enter("pcplusmp: APIC Error interrupt received");
 314 #endif /* DEBUG */
 315                 if (apic_panic_on_apic_error)
 316                         cmn_err(CE_PANIC,
 317                             "APIC Error interrupt on CPU %d. Status = %x",
 318                             psm_get_cpu_id(), error);
 319                 else {
 320                         if ((error & ~APIC_CS_ERRORS) == 0) {
 321                                 /* cksum error only */
 322                                 apic_error |= APIC_ERR_APIC_ERROR;
 323                                 apic_apic_error |= error;
 324                                 apic_num_apic_errors++;
 325                                 apic_num_cksum_errors++;
 326                         } else {
 327                                 /*
 328                                  * prom_printf is the best shot we have of
 329                                  * something which is problem free from
 330                                  * high level/NMI type of interrupts
 331                                  */
 332                                 prom_printf("APIC Error interrupt on CPU %d. "
 333                                     "Status 0 = %x, Status 1 = %x\n",
 334                                     psm_get_cpu_id(), error0, error1);
 335                                 apic_error |= APIC_ERR_APIC_ERROR;
 336                                 apic_apic_error |= error;
 337                                 apic_num_apic_errors++;
 338                                 for (i = 0; i < apic_error_display_delay; i++) {
 339                                         tenmicrosec();
 340                                 }
 341                                 /*
 342                                  * provide more delay next time limited to
 343                                  * roughly 1 clock tick time
 344                                  */
 345                                 if (apic_error_display_delay < 500)
 346                                         apic_error_display_delay *= 2;
 347                         }
 348                 }
 349                 lock_clear(&apic_error_lock);
 350                 return (DDI_INTR_CLAIMED);
 351         } else {
 352                 lock_clear(&apic_error_lock);
 353                 return (DDI_INTR_UNCLAIMED);
 354         }
 355 }
 356 
 357 /*
 358  * Turn off the mask bit in the performance counter Local Vector Table entry.
 359  */
 360 void
 361 apic_cpcovf_mask_clear(void)
 362 {
 363         apic_reg_ops->apic_write(APIC_PCINT_VECT,
 364             (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
 365 }
 366 
 367 /*ARGSUSED*/
 368 static int
 369 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 370 {
 371         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
 372         return (0);
 373 }
 374 
 375 /*ARGSUSED*/
 376 static int
 377 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
 378 {
 379         apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
 380         return (0);
 381 }
 382 
 383 /*ARGSUSED*/
 384 int
 385 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
 386 {
 387         cpuset_t        cpu_set;
 388 
 389         CPUSET_ONLY(cpu_set, cpuid);
 390 
 391         switch (what) {
 392                 case CPU_ON:
 393                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 394                             (xc_func_t)apic_cmci_enable);
 395                         break;
 396 
 397                 case CPU_OFF:
 398                         xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
 399                             (xc_func_t)apic_cmci_disable);
 400                         break;
 401 
 402                 default:
 403                         break;
 404         }
 405 
 406         return (0);
 407 }
 408 
 409 static void
 410 apic_disable_local_apic(void)
 411 {
 412         apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
 413         apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
 414 
 415         /* local intr reg 0 */
 416         apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
 417 
 418         /* disable NMI */
 419         apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
 420 
 421         /* and error interrupt */
 422         apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
 423 
 424         /* and perf counter intr */
 425         apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
 426 
 427         apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
 428 }
 429 
 430 static void
 431 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
 432 {
 433         int             loop_count;
 434         uint32_t        vector;
 435         uint_t          apicid;
 436         ulong_t         iflag;
 437 
 438         apicid =  apic_cpus[cpun].aci_local_id;
 439 
 440         /*
 441          * Interrupts on current CPU will be disabled during the
 442          * steps in order to avoid unwanted side effects from
 443          * executing interrupt handlers on a problematic BIOS.
 444          */
 445         iflag = intr_clear();
 446 
 447         if (start) {
 448                 outb(CMOS_ADDR, SSB);
 449                 outb(CMOS_DATA, BIOS_SHUTDOWN);
 450         }
 451 
 452         /*
 453          * According to X2APIC specification in section '2.3.5.1' of
 454          * Interrupt Command Register Semantics, the semantics of
 455          * programming the Interrupt Command Register to dispatch an interrupt
 456          * is simplified. A single MSR write to the 64-bit ICR is required
 457          * for dispatching an interrupt. Specifically, with the 64-bit MSR
 458          * interface to ICR, system software is not required to check the
 459          * status of the delivery status bit prior to writing to the ICR
 460          * to send an IPI. With the removal of the Delivery Status bit,
 461          * system software no longer has a reason to read the ICR. It remains
 462          * readable only to aid in debugging.
 463          */
 464 #ifdef  DEBUG
 465         APIC_AV_PENDING_SET();
 466 #else
 467         if (apic_mode == LOCAL_APIC) {
 468                 APIC_AV_PENDING_SET();
 469         }
 470 #endif /* DEBUG */
 471 
 472         /* for integrated - make sure there is one INIT IPI in buffer */
 473         /* for external - it will wake up the cpu */
 474         apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
 475 
 476         /* If only 1 CPU is installed, PENDING bit will not go low */
 477         for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
 478                 if (apic_mode == LOCAL_APIC &&
 479                     apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 480                         apic_ret();
 481                 else
 482                         break;
 483         }
 484 
 485         apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
 486         drv_usecwait(20000);            /* 20 milli sec */
 487 
 488         if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
 489                 /* integrated apic */
 490 
 491                 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
 492                     (APIC_VECTOR_MASK | APIC_IPL_MASK);
 493 
 494                 /* to offset the INIT IPI queue up in the buffer */
 495                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 496                 drv_usecwait(200);              /* 20 micro sec */
 497 
 498                 /*
 499                  * send the second SIPI (Startup IPI) as recommended by Intel
 500                  * software development manual.
 501                  */
 502                 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 503                 drv_usecwait(200);      /* 20 micro sec */
 504         }
 505 
 506         intr_restore(iflag);
 507 }
 508 
 509 /*ARGSUSED1*/
 510 int
 511 apic_cpu_start(processorid_t cpun, caddr_t arg)
 512 {
 513         ASSERT(MUTEX_HELD(&cpu_lock));
 514 
 515         if (!apic_cpu_in_range(cpun)) {
 516                 return (EINVAL);
 517         }
 518 
 519         /*
 520          * Switch to apic_common_send_ipi for safety during starting other CPUs.
 521          */
 522         if (apic_mode == LOCAL_X2APIC) {
 523                 apic_switch_ipi_callback(B_TRUE);
 524         }
 525 
 526         apic_cmos_ssb_set = 1;
 527         apic_cpu_send_SIPI(cpun, B_TRUE);
 528 
 529         return (0);
 530 }
 531 
 532 /*
 533  * Put CPU into halted state with interrupts disabled.
 534  */
 535 /*ARGSUSED1*/
 536 int
 537 apic_cpu_stop(processorid_t cpun, caddr_t arg)
 538 {
 539         int             rc;
 540         cpu_t           *cp;
 541         extern cpuset_t cpu_ready_set;
 542         extern void cpu_idle_intercept_cpu(cpu_t *cp);
 543 
 544         ASSERT(MUTEX_HELD(&cpu_lock));
 545 
 546         if (!apic_cpu_in_range(cpun)) {
 547                 return (EINVAL);
 548         }
 549         if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
 550                 return (ENOTSUP);
 551         }
 552 
 553         cp = cpu_get(cpun);
 554         ASSERT(cp != NULL);
 555         ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
 556         ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
 557         ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
 558 
 559         /* Clear CPU_READY flag to disable cross calls. */
 560         cp->cpu_flags &= ~CPU_READY;
 561         CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
 562         rc = xc_flush_cpu(cp);
 563         if (rc != 0) {
 564                 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
 565                 cp->cpu_flags |= CPU_READY;
 566                 return (rc);
 567         }
 568 
 569         /* Intercept target CPU at a safe point before powering it off. */
 570         cpu_idle_intercept_cpu(cp);
 571 
 572         apic_cpu_send_SIPI(cpun, B_FALSE);
 573         cp->cpu_flags &= ~CPU_RUNNING;
 574 
 575         return (0);
 576 }
 577 
 578 int
 579 apic_cpu_ops(psm_cpu_request_t *reqp)
 580 {
 581         if (reqp == NULL) {
 582                 return (EINVAL);
 583         }
 584 
 585         switch (reqp->pcr_cmd) {
 586         case PSM_CPU_ADD:
 587                 return (apic_cpu_add(reqp));
 588 
 589         case PSM_CPU_REMOVE:
 590                 return (apic_cpu_remove(reqp));
 591 
 592         case PSM_CPU_STOP:
 593                 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
 594                     reqp->req.cpu_stop.ctx));
 595 
 596         default:
 597                 return (ENOTSUP);
 598         }
 599 }
 600 
 601 #ifdef  DEBUG
 602 int     apic_break_on_cpu = 9;
 603 int     apic_stretch_interrupts = 0;
 604 int     apic_stretch_ISR = 1 << 3;        /* IPL of 3 matches nothing now */
 605 #endif /* DEBUG */
 606 
 607 /*
 608  * generates an interprocessor interrupt to another CPU. Any changes made to
 609  * this routine must be accompanied by similar changes to
 610  * apic_common_send_ipi().
 611  */
 612 void
 613 apic_send_ipi(int cpun, int ipl)
 614 {
 615         int vector;
 616         ulong_t flag;
 617 
 618         vector = apic_resv_vector[ipl];
 619 
 620         ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
 621 
 622         flag = intr_clear();
 623 
 624         APIC_AV_PENDING_SET();
 625 
 626         apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
 627             vector);
 628 
 629         intr_restore(flag);
 630 }
 631 
 632 
 633 /*ARGSUSED*/
 634 void
 635 apic_set_idlecpu(processorid_t cpun)
 636 {
 637 }
 638 
 639 /*ARGSUSED*/
 640 void
 641 apic_unset_idlecpu(processorid_t cpun)
 642 {
 643 }
 644 
 645 
 646 void
 647 apic_ret()
 648 {
 649 }
 650 
 651 /*
 652  * If apic_coarse_time == 1, then apic_gettime() is used instead of
 653  * apic_gethrtime().  This is used for performance instead of accuracy.
 654  */
 655 
 656 hrtime_t
 657 apic_gettime()
 658 {
 659         int old_hrtime_stamp;
 660         hrtime_t temp;
 661 
 662         /*
 663          * In one-shot mode, we do not keep time, so if anyone
 664          * calls psm_gettime() directly, we vector over to
 665          * gethrtime().
 666          * one-shot mode MUST NOT be enabled if this psm is the source of
 667          * hrtime.
 668          */
 669 
 670         if (apic_oneshot)
 671                 return (gethrtime());
 672 
 673 
 674 gettime_again:
 675         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 676                 apic_ret();
 677 
 678         temp = apic_nsec_since_boot;
 679 
 680         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 681                 goto gettime_again;
 682         }
 683         return (temp);
 684 }
 685 
 686 /*
 687  * Here we return the number of nanoseconds since booting.  Note every
 688  * clock interrupt increments apic_nsec_since_boot by the appropriate
 689  * amount.
 690  */
 691 hrtime_t
 692 apic_gethrtime(void)
 693 {
 694         int curr_timeval, countval, elapsed_ticks;
 695         int old_hrtime_stamp, status;
 696         hrtime_t temp;
 697         uint32_t cpun;
 698         ulong_t oflags;
 699 
 700         /*
 701          * In one-shot mode, we do not keep time, so if anyone
 702          * calls psm_gethrtime() directly, we vector over to
 703          * gethrtime().
 704          * one-shot mode MUST NOT be enabled if this psm is the source of
 705          * hrtime.
 706          */
 707 
 708         if (apic_oneshot)
 709                 return (gethrtime());
 710 
 711         oflags = intr_clear();  /* prevent migration */
 712 
 713         cpun = apic_reg_ops->apic_read(APIC_LID_REG);
 714         if (apic_mode == LOCAL_APIC)
 715                 cpun >>= APIC_ID_BIT_OFFSET;
 716 
 717         lock_set(&apic_gethrtime_lock);
 718 
 719 gethrtime_again:
 720         while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
 721                 apic_ret();
 722 
 723         /*
 724          * Check to see which CPU we are on.  Note the time is kept on
 725          * the local APIC of CPU 0.  If on CPU 0, simply read the current
 726          * counter.  If on another CPU, issue a remote read command to CPU 0.
 727          */
 728         if (cpun == apic_cpus[0].aci_local_id) {
 729                 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
 730         } else {
 731 #ifdef  DEBUG
 732                 APIC_AV_PENDING_SET();
 733 #else
 734                 if (apic_mode == LOCAL_APIC)
 735                         APIC_AV_PENDING_SET();
 736 #endif /* DEBUG */
 737 
 738                 apic_reg_ops->apic_write_int_cmd(
 739                     apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
 740 
 741                 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
 742                     & AV_READ_PENDING) {
 743                         apic_ret();
 744                 }
 745 
 746                 if (status & AV_REMOTE_STATUS)      /* 1 = valid */
 747                         countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
 748                 else {  /* 0 = invalid */
 749                         apic_remote_hrterr++;
 750                         /*
 751                          * return last hrtime right now, will need more
 752                          * testing if change to retry
 753                          */
 754                         temp = apic_last_hrtime;
 755 
 756                         lock_clear(&apic_gethrtime_lock);
 757 
 758                         intr_restore(oflags);
 759 
 760                         return (temp);
 761                 }
 762         }
 763         if (countval > last_count_read)
 764                 countval = 0;
 765         else
 766                 last_count_read = countval;
 767 
 768         elapsed_ticks = apic_hertz_count - countval;
 769 
 770         curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
 771         temp = apic_nsec_since_boot + curr_timeval;
 772 
 773         if (apic_hrtime_stamp != old_hrtime_stamp) {    /* got an interrupt */
 774                 /* we might have clobbered last_count_read. Restore it */
 775                 last_count_read = apic_hertz_count;
 776                 goto gethrtime_again;
 777         }
 778 
 779         if (temp < apic_last_hrtime) {
 780                 /* return last hrtime if error occurs */
 781                 apic_hrtime_error++;
 782                 temp = apic_last_hrtime;
 783         }
 784         else
 785                 apic_last_hrtime = temp;
 786 
 787         lock_clear(&apic_gethrtime_lock);
 788         intr_restore(oflags);
 789 
 790         return (temp);
 791 }
 792 
 793 /* apic NMI handler */
 794 /*ARGSUSED*/
 795 void
 796 apic_nmi_intr(caddr_t arg, struct regs *rp)
 797 {
 798         if (apic_shutdown_processors) {
 799                 apic_disable_local_apic();
 800                 return;
 801         }
 802 
 803         apic_error |= APIC_ERR_NMI;
 804 
 805         if (!lock_try(&apic_nmi_lock))
 806                 return;
 807         apic_num_nmis++;
 808 
 809         if (apic_kmdb_on_nmi && psm_debugger()) {
 810                 debug_enter("NMI received: entering kmdb\n");
 811         } else if (apic_panic_on_nmi) {
 812                 /* Keep panic from entering kmdb. */
 813                 nopanicdebug = 1;
 814                 panic("NMI received\n");
 815         } else {
 816                 /*
 817                  * prom_printf is the best shot we have of something which is
 818                  * problem free from high level/NMI type of interrupts
 819                  */
 820                 prom_printf("NMI received\n");
 821         }
 822 
 823         lock_clear(&apic_nmi_lock);
 824 }
 825 
 826 processorid_t
 827 apic_get_next_processorid(processorid_t cpu_id)
 828 {
 829 
 830         int i;
 831 
 832         if (cpu_id == -1)
 833                 return ((processorid_t)0);
 834 
 835         for (i = cpu_id + 1; i < NCPU; i++) {
 836                 if (apic_cpu_in_range(i))
 837                         return (i);
 838         }
 839 
 840         return ((processorid_t)-1);
 841 }
 842 
 843 int
 844 apic_cpu_add(psm_cpu_request_t *reqp)
 845 {
 846         int i, rv = 0;
 847         ulong_t iflag;
 848         boolean_t first = B_TRUE;
 849         uchar_t localver;
 850         uint32_t localid, procid;
 851         processorid_t cpuid = (processorid_t)-1;
 852         mach_cpu_add_arg_t *ap;
 853 
 854         ASSERT(reqp != NULL);
 855         reqp->req.cpu_add.cpuid = (processorid_t)-1;
 856 
 857         /* Check whether CPU hotplug is supported. */
 858         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
 859                 return (ENOTSUP);
 860         }
 861 
 862         ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
 863         switch (ap->type) {
 864         case MACH_CPU_ARG_LOCAL_APIC:
 865                 localid = ap->arg.apic.apic_id;
 866                 procid = ap->arg.apic.proc_id;
 867                 if (localid >= 255 || procid > 255) {
 868                         cmn_err(CE_WARN,
 869                             "!apic: apicid(%u) or procid(%u) is invalid.",
 870                             localid, procid);
 871                         return (EINVAL);
 872                 }
 873                 break;
 874 
 875         case MACH_CPU_ARG_LOCAL_X2APIC:
 876                 localid = ap->arg.apic.apic_id;
 877                 procid = ap->arg.apic.proc_id;
 878                 if (localid >= UINT32_MAX) {
 879                         cmn_err(CE_WARN,
 880                             "!apic: x2apicid(%u) is invalid.", localid);
 881                         return (EINVAL);
 882                 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
 883                         cmn_err(CE_WARN, "!apic: system is in APIC mode, "
 884                             "can't support x2APIC processor.");
 885                         return (ENOTSUP);
 886                 }
 887                 break;
 888 
 889         default:
 890                 cmn_err(CE_WARN,
 891                     "!apic: unknown argument type %d to apic_cpu_add().",
 892                     ap->type);
 893                 return (EINVAL);
 894         }
 895 
 896         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
 897         iflag = intr_clear();
 898         lock_set(&apic_ioapic_lock);
 899 
 900         /* Check whether local APIC id already exists. */
 901         for (i = 0; i < apic_nproc; i++) {
 902                 if (!CPU_IN_SET(apic_cpumask, i))
 903                         continue;
 904                 if (apic_cpus[i].aci_local_id == localid) {
 905                         lock_clear(&apic_ioapic_lock);
 906                         intr_restore(iflag);
 907                         cmn_err(CE_WARN,
 908                             "!apic: local apic id %u already exists.",
 909                             localid);
 910                         return (EEXIST);
 911                 } else if (apic_cpus[i].aci_processor_id == procid) {
 912                         lock_clear(&apic_ioapic_lock);
 913                         intr_restore(iflag);
 914                         cmn_err(CE_WARN,
 915                             "!apic: processor id %u already exists.",
 916                             (int)procid);
 917                         return (EEXIST);
 918                 }
 919 
 920                 /*
 921                  * There's no local APIC version number available in MADT table,
 922                  * so assume that all CPUs are homogeneous and use local APIC
 923                  * version number of the first existing CPU.
 924                  */
 925                 if (first) {
 926                         first = B_FALSE;
 927                         localver = apic_cpus[i].aci_local_ver;
 928                 }
 929         }
 930         ASSERT(first == B_FALSE);
 931 
 932         /*
 933          * Try to assign the same cpuid if APIC id exists in the dirty cache.
 934          */
 935         for (i = 0; i < apic_max_nproc; i++) {
 936                 if (CPU_IN_SET(apic_cpumask, i)) {
 937                         ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
 938                         continue;
 939                 }
 940                 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
 941                 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
 942                     apic_cpus[i].aci_local_id == localid &&
 943                     apic_cpus[i].aci_processor_id == procid) {
 944                         cpuid = i;
 945                         break;
 946                 }
 947         }
 948 
 949         /* Avoid the dirty cache and allocate fresh slot if possible. */
 950         if (cpuid == (processorid_t)-1) {
 951                 for (i = 0; i < apic_max_nproc; i++) {
 952                         if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
 953                             (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
 954                                 cpuid = i;
 955                                 break;
 956                         }
 957                 }
 958         }
 959 
 960         /* Try to find any free slot as last resort. */
 961         if (cpuid == (processorid_t)-1) {
 962                 for (i = 0; i < apic_max_nproc; i++) {
 963                         if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
 964                                 cpuid = i;
 965                                 break;
 966                         }
 967                 }
 968         }
 969 
 970         if (cpuid == (processorid_t)-1) {
 971                 lock_clear(&apic_ioapic_lock);
 972                 intr_restore(iflag);
 973                 cmn_err(CE_NOTE,
 974                     "!apic: failed to allocate cpu id for processor %u.",
 975                     procid);
 976                 rv = EAGAIN;
 977         } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
 978                 lock_clear(&apic_ioapic_lock);
 979                 intr_restore(iflag);
 980                 cmn_err(CE_NOTE,
 981                     "!apic: failed to build mapping for processor %u.",
 982                     procid);
 983                 rv = EBUSY;
 984         } else {
 985                 ASSERT(cpuid >= 0 && cpuid < NCPU);
 986                 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
 987                 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
 988                 apic_cpus[cpuid].aci_processor_id = procid;
 989                 apic_cpus[cpuid].aci_local_id = localid;
 990                 apic_cpus[cpuid].aci_local_ver = localver;
 991                 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
 992                 if (cpuid >= apic_nproc) {
 993                         apic_nproc = cpuid + 1;
 994                 }
 995                 lock_clear(&apic_ioapic_lock);
 996                 intr_restore(iflag);
 997                 reqp->req.cpu_add.cpuid = cpuid;
 998         }
 999 
1000         return (rv);
1001 }
1002 
1003 int
1004 apic_cpu_remove(psm_cpu_request_t *reqp)
1005 {
1006         int i;
1007         ulong_t iflag;
1008         processorid_t cpuid;
1009 
1010         /* Check whether CPU hotplug is supported. */
1011         if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1012                 return (ENOTSUP);
1013         }
1014 
1015         cpuid = reqp->req.cpu_remove.cpuid;
1016 
1017         /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1018         iflag = intr_clear();
1019         lock_set(&apic_ioapic_lock);
1020 
1021         if (!apic_cpu_in_range(cpuid)) {
1022                 lock_clear(&apic_ioapic_lock);
1023                 intr_restore(iflag);
1024                 cmn_err(CE_WARN,
1025                     "!apic: cpuid %d doesn't exist in apic_cpus array.",
1026                     cpuid);
1027                 return (ENODEV);
1028         }
1029         ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1030 
1031         if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1032                 lock_clear(&apic_ioapic_lock);
1033                 intr_restore(iflag);
1034                 return (ENOENT);
1035         }
1036 
1037         if (cpuid == apic_nproc - 1) {
1038                 /*
1039                  * We are removing the highest numbered cpuid so we need to
1040                  * find the next highest cpuid as the new value for apic_nproc.
1041                  */
1042                 for (i = apic_nproc; i > 0; i--) {
1043                         if (CPU_IN_SET(apic_cpumask, i - 1)) {
1044                                 apic_nproc = i;
1045                                 break;
1046                         }
1047                 }
1048                 /* at least one CPU left */
1049                 ASSERT(i > 0);
1050         }
1051         CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1052         /* mark slot as free and keep it in the dirty cache */
1053         apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1054 
1055         lock_clear(&apic_ioapic_lock);
1056         intr_restore(iflag);
1057 
1058         return (0);
1059 }
1060 
1061 /*
1062  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1063  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1064  */
1065 uint_t
1066 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1067 {
1068         uint8_t         pit_tick_lo;
1069         uint16_t        pit_tick, target_pit_tick;
1070         uint32_t        start_apic_tick, end_apic_tick;
1071         ulong_t         iflag;
1072         uint32_t        reg;
1073 
1074         reg = addr + APIC_CURR_COUNT - apicadr;
1075 
1076         iflag = intr_clear();
1077 
1078         do {
1079                 pit_tick_lo = inb(PITCTR0_PORT);
1080                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1081         } while (pit_tick < APIC_TIME_MIN ||
1082             pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1083 
1084         /*
1085          * Wait for the 8254 to decrement by 5 ticks to ensure
1086          * we didn't start in the middle of a tick.
1087          * Compare with 0x10 for the wrap around case.
1088          */
1089         target_pit_tick = pit_tick - 5;
1090         do {
1091                 pit_tick_lo = inb(PITCTR0_PORT);
1092                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1093         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1094 
1095         start_apic_tick = apic_reg_ops->apic_read(reg);
1096 
1097         /*
1098          * Wait for the 8254 to decrement by
1099          * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1100          */
1101         target_pit_tick = pit_tick - APIC_TIME_COUNT;
1102         do {
1103                 pit_tick_lo = inb(PITCTR0_PORT);
1104                 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1105         } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1106 
1107         end_apic_tick = apic_reg_ops->apic_read(reg);
1108 
1109         *pit_ticks_adj = target_pit_tick - pit_tick;
1110 
1111         intr_restore(iflag);
1112 
1113         return (start_apic_tick - end_apic_tick);
1114 }
1115 
1116 /*
1117  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1118  * frequency.  Note at this stage in the boot sequence, the boot processor
1119  * is the only active processor.
1120  * hertz value of 0 indicates a one-shot mode request.  In this case
1121  * the function returns the resolution (in nanoseconds) for the hardware
1122  * timer interrupt.  If one-shot mode capability is not available,
1123  * the return value will be 0. apic_enable_oneshot is a global switch
1124  * for disabling the functionality.
1125  * A non-zero positive value for hertz indicates a periodic mode request.
1126  * In this case the hardware will be programmed to generate clock interrupts
1127  * at hertz frequency and returns the resolution of interrupts in
1128  * nanosecond.
1129  */
1130 
1131 int
1132 apic_clkinit(int hertz)
1133 {
1134         int             ret;
1135 
1136         apic_int_busy_mark = (apic_int_busy_mark *
1137             apic_sample_factor_redistribution) / 100;
1138         apic_int_free_mark = (apic_int_free_mark *
1139             apic_sample_factor_redistribution) / 100;
1140         apic_diff_for_redistribution = (apic_diff_for_redistribution *
1141             apic_sample_factor_redistribution) / 100;
1142 
1143         ret = apic_timer_init(hertz);
1144         return (ret);
1145 
1146 }
1147 
1148 /*
1149  * apic_preshutdown:
1150  * Called early in shutdown whilst we can still access filesystems to do
1151  * things like loading modules which will be required to complete shutdown
1152  * after filesystems are all unmounted.
1153  */
1154 void
1155 apic_preshutdown(int cmd, int fcn)
1156 {
1157         APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1158             cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1159 }
1160 
1161 void
1162 apic_shutdown(int cmd, int fcn)
1163 {
1164         int restarts, attempts;
1165         int i;
1166         uchar_t byte;
1167         ulong_t iflag;
1168 
1169         hpet_acpi_fini();
1170 
1171         /* Send NMI to all CPUs except self to do per processor shutdown */
1172         iflag = intr_clear();
1173 #ifdef  DEBUG
1174         APIC_AV_PENDING_SET();
1175 #else
1176         if (apic_mode == LOCAL_APIC)
1177                 APIC_AV_PENDING_SET();
1178 #endif /* DEBUG */
1179         apic_shutdown_processors = 1;
1180         apic_reg_ops->apic_write(APIC_INT_CMD1,
1181             AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1182 
1183         /* restore cmos shutdown byte before reboot */
1184         if (apic_cmos_ssb_set) {
1185                 outb(CMOS_ADDR, SSB);
1186                 outb(CMOS_DATA, 0);
1187         }
1188 
1189         ioapic_disable_redirection();
1190 
1191         /*      disable apic mode if imcr present       */
1192         if (apic_imcrp) {
1193                 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1194                 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1195         }
1196 
1197         apic_disable_local_apic();
1198 
1199         intr_restore(iflag);
1200 
1201         /* remainder of function is for shutdown cases only */
1202         if (cmd != A_SHUTDOWN)
1203                 return;
1204 
1205         /*
1206          * Switch system back into Legacy-Mode if using ACPI and
1207          * not powering-off.  Some BIOSes need to remain in ACPI-mode
1208          * for power-off to succeed (Dell Dimension 4600)
1209          * Do not disable ACPI while doing fastreboot
1210          */
1211         if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1212                 (void) AcpiDisable();
1213 
1214         if (fcn == AD_FASTREBOOT) {
1215                 apic_reg_ops->apic_write(APIC_INT_CMD1,
1216                     AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1217         }
1218 
1219         /* remainder of function is for shutdown+poweroff case only */
1220         if (fcn != AD_POWEROFF)
1221                 return;
1222 
1223         switch (apic_poweroff_method) {
1224                 case APIC_POWEROFF_VIA_RTC:
1225 
1226                         /* select the extended NVRAM bank in the RTC */
1227                         outb(CMOS_ADDR, RTC_REGA);
1228                         byte = inb(CMOS_DATA);
1229                         outb(CMOS_DATA, (byte | EXT_BANK));
1230 
1231                         outb(CMOS_ADDR, PFR_REG);
1232 
1233                         /* for Predator must toggle the PAB bit */
1234                         byte = inb(CMOS_DATA);
1235 
1236                         /*
1237                          * clear power active bar, wakeup alarm and
1238                          * kickstart
1239                          */
1240                         byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1241                         outb(CMOS_DATA, byte);
1242 
1243                         /* delay before next write */
1244                         drv_usecwait(1000);
1245 
1246                         /* for S40 the following would suffice */
1247                         byte = inb(CMOS_DATA);
1248 
1249                         /* power active bar control bit */
1250                         byte |= PAB_CBIT;
1251                         outb(CMOS_DATA, byte);
1252 
1253                         break;
1254 
1255                 case APIC_POWEROFF_VIA_ASPEN_BMC:
1256                         restarts = 0;
1257 restart_aspen_bmc:
1258                         if (++restarts == 3)
1259                                 break;
1260                         attempts = 0;
1261                         do {
1262                                 byte = inb(MISMIC_FLAG_REGISTER);
1263                                 byte &= MISMIC_BUSY_MASK;
1264                                 if (byte != 0) {
1265                                         drv_usecwait(1000);
1266                                         if (attempts >= 3)
1267                                                 goto restart_aspen_bmc;
1268                                         ++attempts;
1269                                 }
1270                         } while (byte != 0);
1271                         outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1272                         byte = inb(MISMIC_FLAG_REGISTER);
1273                         byte |= 0x1;
1274                         outb(MISMIC_FLAG_REGISTER, byte);
1275                         i = 0;
1276                         for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1277                             i++) {
1278                                 attempts = 0;
1279                                 do {
1280                                         byte = inb(MISMIC_FLAG_REGISTER);
1281                                         byte &= MISMIC_BUSY_MASK;
1282                                         if (byte != 0) {
1283                                                 drv_usecwait(1000);
1284                                                 if (attempts >= 3)
1285                                                         goto restart_aspen_bmc;
1286                                                 ++attempts;
1287                                         }
1288                                 } while (byte != 0);
1289                                 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1290                                 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1291                                 byte = inb(MISMIC_FLAG_REGISTER);
1292                                 byte |= 0x1;
1293                                 outb(MISMIC_FLAG_REGISTER, byte);
1294                         }
1295                         break;
1296 
1297                 case APIC_POWEROFF_VIA_SITKA_BMC:
1298                         restarts = 0;
1299 restart_sitka_bmc:
1300                         if (++restarts == 3)
1301                                 break;
1302                         attempts = 0;
1303                         do {
1304                                 byte = inb(SMS_STATUS_REGISTER);
1305                                 byte &= SMS_STATE_MASK;
1306                                 if ((byte == SMS_READ_STATE) ||
1307                                     (byte == SMS_WRITE_STATE)) {
1308                                         drv_usecwait(1000);
1309                                         if (attempts >= 3)
1310                                                 goto restart_sitka_bmc;
1311                                         ++attempts;
1312                                 }
1313                         } while ((byte == SMS_READ_STATE) ||
1314                             (byte == SMS_WRITE_STATE));
1315                         outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1316                         i = 0;
1317                         for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1318                             i++) {
1319                                 attempts = 0;
1320                                 do {
1321                                         byte = inb(SMS_STATUS_REGISTER);
1322                                         byte &= SMS_IBF_MASK;
1323                                         if (byte != 0) {
1324                                                 drv_usecwait(1000);
1325                                                 if (attempts >= 3)
1326                                                         goto restart_sitka_bmc;
1327                                                 ++attempts;
1328                                         }
1329                                 } while (byte != 0);
1330                                 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1331                         }
1332                         break;
1333 
1334                 case APIC_POWEROFF_NONE:
1335 
1336                         /* If no APIC direct method, we will try using ACPI */
1337                         if (apic_enable_acpi) {
1338                                 if (acpi_poweroff() == 1)
1339                                         return;
1340                         } else
1341                                 return;
1342 
1343                         break;
1344         }
1345         /*
1346          * Wait a limited time here for power to go off.
1347          * If the power does not go off, then there was a
1348          * problem and we should continue to the halt which
1349          * prints a message for the user to press a key to
1350          * reboot.
1351          */
1352         drv_usecwait(7000000); /* wait seven seconds */
1353 
1354 }
1355 
1356 cyclic_id_t apic_cyclic_id;
1357 
1358 /*
1359  * The following functions are in the platform specific file so that they
1360  * can be different functions depending on whether we are running on
1361  * bare metal or a hypervisor.
1362  */
1363 
1364 /*
1365  * map an apic for memory-mapped access
1366  */
1367 uint32_t *
1368 mapin_apic(uint32_t addr, size_t len, int flags)
1369 {
1370         return ((void *)psm_map_phys(addr, len, flags));
1371 }
1372 
1373 uint32_t *
1374 mapin_ioapic(uint32_t addr, size_t len, int flags)
1375 {
1376         return (mapin_apic(addr, len, flags));
1377 }
1378 
1379 /*
1380  * unmap an apic
1381  */
1382 void
1383 mapout_apic(caddr_t addr, size_t len)
1384 {
1385         psm_unmap_phys(addr, len);
1386 }
1387 
1388 void
1389 mapout_ioapic(caddr_t addr, size_t len)
1390 {
1391         mapout_apic(addr, len);
1392 }
1393 
1394 uint32_t
1395 ioapic_read(int ioapic_ix, uint32_t reg)
1396 {
1397         volatile uint32_t *ioapic;
1398 
1399         ioapic = apicioadr[ioapic_ix];
1400         ioapic[APIC_IO_REG] = reg;
1401         return (ioapic[APIC_IO_DATA]);
1402 }
1403 
1404 void
1405 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1406 {
1407         volatile uint32_t *ioapic;
1408 
1409         ioapic = apicioadr[ioapic_ix];
1410         ioapic[APIC_IO_REG] = reg;
1411         ioapic[APIC_IO_DATA] = value;
1412 }
1413 
1414 void
1415 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1416 {
1417         volatile uint32_t *ioapic;
1418 
1419         ioapic = apicioadr[ioapic_ix];
1420         ioapic[APIC_IO_EOI] = value;
1421 }
1422 
1423 /*
1424  * Round-robin algorithm to find the next CPU with interrupts enabled.
1425  * It can't share the same static variable apic_next_bind_cpu with
1426  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1427  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1428  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1429  * are called.  However, the pcplusmp driver assumes that there will be
1430  * boot_ncpus CPUs configured eventually so it tries to distribute all
1431  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1432  * interrupts being targetted at CPU1, we need to use a dedicated static
1433  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1434  */
1435 
1436 processorid_t
1437 apic_find_cpu(int flag)
1438 {
1439         int i;
1440         static processorid_t acid = 0;
1441 
1442         /* Find the first CPU with the passed-in flag set */
1443         for (i = 0; i < apic_nproc; i++) {
1444                 if (++acid >= apic_nproc) {
1445                         acid = 0;
1446                 }
1447                 if (apic_cpu_in_range(acid) &&
1448                     (apic_cpus[acid].aci_status & flag)) {
1449                         break;
1450                 }
1451         }
1452 
1453         ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1454         return (acid);
1455 }
1456 
1457 void
1458 apic_intrmap_init(int apic_mode)
1459 {
1460         int suppress_brdcst_eoi = 0;
1461 
1462         /*
1463          * Intel Software Developer's Manual 3A, 10.12.7:
1464          *
1465          * Routing of device interrupts to local APIC units operating in
1466          * x2APIC mode requires use of the interrupt-remapping architecture
1467          * specified in the Intel Virtualization Technology for Directed
1468          * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1469          * for and software must enable this interrupt remapping with
1470          * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1471          * the local APIC units.
1472          *
1473          *
1474          * In other words, to use the APIC in x2APIC mode, we need interrupt
1475          * remapping.  Since we don't start up the IOMMU by default, we
1476          * won't be able to do any interrupt remapping and therefore have to
1477          * use the APIC in traditional 'local APIC' mode with memory mapped
1478          * I/O.
1479          */
1480 
1481         if (psm_vt_ops != NULL) {
1482                 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1483                     apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1484 
1485                         apic_vt_ops = psm_vt_ops;
1486 
1487                         /*
1488                          * We leverage the interrupt remapping engine to
1489                          * suppress broadcast EOI; thus we must send the
1490                          * directed EOI with the directed-EOI handler.
1491                          */
1492                         if (apic_directed_EOI_supported() == 0) {
1493                                 suppress_brdcst_eoi = 1;
1494                         }
1495 
1496                         apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1497 
1498                         if (apic_detect_x2apic()) {
1499                                 apic_enable_x2apic();
1500                         }
1501 
1502                         if (apic_directed_EOI_supported() == 0) {
1503                                 apic_set_directed_EOI_handler();
1504                         }
1505                 }
1506         }
1507 }
1508 
1509 /*ARGSUSED*/
1510 static void
1511 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1512 {
1513         irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1514 }
1515 
1516 /*ARGSUSED*/
1517 static void
1518 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1519 {
1520         mregs->mr_addr = MSI_ADDR_HDR |
1521             (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1522             (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1523             (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1524         mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1525             mregs->mr_data;
1526 }
1527 
1528 /*
1529  * Functions from apic_introp.c
1530  *
1531  * Those functions are used by apic_intr_ops().
1532  */
1533 
1534 /*
1535  * MSI support flag:
1536  * reflects whether MSI is supported at APIC level
1537  * it can also be patched through /etc/system
1538  *
1539  *  0 = default value - don't know and need to call apic_check_msi_support()
1540  *      to find out then set it accordingly
1541  *  1 = supported
1542  * -1 = not supported
1543  */
1544 int     apic_support_msi = 0;
1545 
1546 /* Multiple vector support for MSI-X */
1547 int     apic_msix_enable = 1;
1548 
1549 /* Multiple vector support for MSI */
1550 int     apic_multi_msi_enable = 1;
1551 
1552 /*
1553  * Check whether the system supports MSI.
1554  *
1555  * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1556  * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1557  * return PSM_SUCCESS to indicate this system supports MSI.
1558  *
1559  * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1560  * by detecting if we are running inside the KVM hypervisor, which guarantees
1561  * this version number.)
1562  */
1563 int
1564 apic_check_msi_support()
1565 {
1566         dev_info_t *cdip;
1567         char dev_type[16];
1568         int dev_len;
1569 
1570         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1571 
1572         /*
1573          * check whether the first level children of root_node have
1574          * PCI-E or PCI capability.
1575          */
1576         for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1577             cdip = ddi_get_next_sibling(cdip)) {
1578 
1579                 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1580                     " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1581                     ddi_driver_name(cdip), ddi_binding_name(cdip),
1582                     ddi_node_name(cdip)));
1583                 dev_len = sizeof (dev_type);
1584                 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1585                     "device_type", (caddr_t)dev_type, &dev_len)
1586                     != DDI_PROP_SUCCESS)
1587                         continue;
1588                 if (strcmp(dev_type, "pciex") == 0)
1589                         return (PSM_SUCCESS);
1590                 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
1591                         return (PSM_SUCCESS);
1592         }
1593 
1594         /* MSI is not supported on this system */
1595         DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1596             "device_type found\n"));
1597         return (PSM_FAILURE);
1598 }
1599 
1600 /*
1601  * apic_pci_msi_unconfigure:
1602  *
1603  * This and next two interfaces are copied from pci_intr_lib.c
1604  * Do ensure that these two files stay in sync.
1605  * These needed to be copied over here to avoid a deadlock situation on
1606  * certain mp systems that use MSI interrupts.
1607  *
1608  * IMPORTANT regards next three interfaces:
1609  * i) are called only for MSI/X interrupts.
1610  * ii) called with interrupts disabled, and must not block
1611  */
1612 void
1613 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1614 {
1615         ushort_t                msi_ctrl;
1616         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1617         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1618 
1619         ASSERT((handle != NULL) && (cap_ptr != 0));
1620 
1621         if (type == DDI_INTR_TYPE_MSI) {
1622                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1623                 msi_ctrl &= (~PCI_MSI_MME_MASK);
1624                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1625                 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1626 
1627                 if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1628                         pci_config_put16(handle,
1629                             cap_ptr + PCI_MSI_64BIT_DATA, 0);
1630                         pci_config_put32(handle,
1631                             cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1632                 } else {
1633                         pci_config_put16(handle,
1634                             cap_ptr + PCI_MSI_32BIT_DATA, 0);
1635                 }
1636 
1637         } else if (type == DDI_INTR_TYPE_MSIX) {
1638                 uintptr_t       off;
1639                 uint32_t        mask;
1640                 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1641 
1642                 ASSERT(msix_p != NULL);
1643 
1644                 /* Offset into "inum"th entry in the MSI-X table & mask it */
1645                 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1646                     PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1647 
1648                 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1649 
1650                 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1651 
1652                 /* Offset into the "inum"th entry in the MSI-X table */
1653                 off = (uintptr_t)msix_p->msix_tbl_addr +
1654                     (inum * PCI_MSIX_VECTOR_SIZE);
1655 
1656                 /* Reset the "data" and "addr" bits */
1657                 ddi_put32(msix_p->msix_tbl_hdl,
1658                     (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1659                 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1660         }
1661 }
1662 
1663 /*
1664  * apic_pci_msi_disable_mode:
1665  */
1666 void
1667 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1668 {
1669         ushort_t                msi_ctrl;
1670         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1671         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(rdip);
1672 
1673         ASSERT((handle != NULL) && (cap_ptr != 0));
1674 
1675         if (type == DDI_INTR_TYPE_MSI) {
1676                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1677                 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1678                         return;
1679 
1680                 msi_ctrl &= ~PCI_MSI_ENABLE_BIT;    /* MSI disable */
1681                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1682 
1683         } else if (type == DDI_INTR_TYPE_MSIX) {
1684                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1685                 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1686                         msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1687                         pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1688                             msi_ctrl);
1689                 }
1690         }
1691 }
1692 
1693 uint32_t
1694 apic_get_localapicid(uint32_t cpuid)
1695 {
1696         ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1697 
1698         return (apic_cpus[cpuid].aci_local_id);
1699 }
1700 
1701 uchar_t
1702 apic_get_ioapicid(uchar_t ioapicindex)
1703 {
1704         ASSERT(ioapicindex < MAX_IO_APIC);
1705 
1706         return (apic_io_id[ioapicindex]);
1707 }