1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31
32 #include <mach_kdp.h>
33 #include <kdp/kdp_internal.h>
34 #include <mach_ldebug.h>
35
36 #include <mach/mach_types.h>
37 #include <mach/kern_return.h>
38
39 #include <kern/kern_types.h>
40 #include <kern/startup.h>
41 #include <kern/timer_queue.h>
42 #include <kern/processor.h>
43 #include <kern/cpu_number.h>
44 #include <kern/cpu_data.h>
45 #include <kern/assert.h>
46 #include <kern/lock_group.h>
47 #include <kern/machine.h>
48 #include <kern/pms.h>
49 #include <kern/misc_protos.h>
50 #include <kern/timer_call.h>
51 #include <kern/zalloc.h>
52 #include <kern/queue.h>
53 #include <kern/monotonic.h>
54 #include <prng/random.h>
55
56 #include <vm/vm_map.h>
57 #include <vm/vm_kern.h>
58
59 #include <i386/bit_routines.h>
60 #include <i386/proc_reg.h>
61 #include <i386/cpu_threads.h>
62 #include <i386/mp_desc.h>
63 #include <i386/misc_protos.h>
64 #include <i386/trap_internal.h>
65 #include <i386/postcode.h>
66 #include <i386/machine_routines.h>
67 #include <i386/mp.h>
68 #include <i386/mp_events.h>
69 #include <i386/lapic.h>
70 #include <i386/cpuid.h>
71 #include <i386/fpu.h>
72 #include <i386/machine_cpu.h>
73 #include <i386/pmCPU.h>
74 #if CONFIG_MCA
75 #include <i386/machine_check.h>
76 #endif
77 #include <i386/acpi.h>
78
79 #include <sys/kdebug.h>
80
81 #include <console/serial_protos.h>
82
83 #if KPERF
84 #include <kperf/kptimer.h>
85 #endif /* KPERF */
86
87 #if MP_DEBUG
88 #define PAUSE delay(1000000)
89 #define DBG(x...) kprintf(x)
90 #else
91 #define DBG(x...)
92 #define PAUSE
93 #endif /* MP_DEBUG */
94
95 /* Debugging/test trace events: */
96 #define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
97 #define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
98 #define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
99 #define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
100 #define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
101 #define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
102 #define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
103 #define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
104
105 #define ABS(v) (((v) > 0)?(v):-(v))
106
107 void slave_boot_init(void);
108 void i386_cpu_IPI(int cpu);
109
110 #if MACH_KDP
111 static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
112 #endif /* MACH_KDP */
113
114 #if MACH_KDP
115 static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
116 #endif /* MACH_KDP */
117 static int NMIInterruptHandler(x86_saved_state_t *regs);
118
119 boolean_t smp_initialized = FALSE;
120 uint32_t TSC_sync_margin = 0xFFF;
121 volatile boolean_t force_immediate_debugger_NMI = FALSE;
122 volatile boolean_t pmap_tlb_flush_timeout = FALSE;
123 #if DEBUG || DEVELOPMENT
124 boolean_t mp_interrupt_watchdog_enabled = TRUE;
125 uint32_t mp_interrupt_watchdog_events = 0;
126 #endif
127
128 SIMPLE_LOCK_DECLARE(debugger_callback_lock, 0);
129 struct debugger_callback *debugger_callback = NULL;
130
131 static LCK_GRP_DECLARE(smp_lck_grp, "i386_smp");
132 static LCK_MTX_DECLARE(mp_cpu_boot_lock, &smp_lck_grp);
133
134 /* Variables needed for MP rendezvous. */
135 SIMPLE_LOCK_DECLARE(mp_rv_lock, 0);
136 static void (*mp_rv_setup_func)(void *arg);
137 static void (*mp_rv_action_func)(void *arg);
138 static void (*mp_rv_teardown_func)(void *arg);
139 static void *mp_rv_func_arg;
140 static volatile int mp_rv_ncpus;
141 /* Cache-aligned barriers: */
142 static volatile long mp_rv_entry __attribute__((aligned(64)));
143 static volatile long mp_rv_exit __attribute__((aligned(64)));
144 static volatile long mp_rv_complete __attribute__((aligned(64)));
145
146 volatile uint64_t debugger_entry_time;
147 volatile uint64_t debugger_exit_time;
148 #if MACH_KDP
149 #include <kdp/kdp.h>
150 extern int kdp_snapshot;
151 static struct _kdp_xcpu_call_func {
152 kdp_x86_xcpu_func_t func;
153 void *arg0, *arg1;
154 volatile long ret;
155 volatile uint16_t cpu;
156 } kdp_xcpu_call_func = {
157 .cpu = KDP_XCPU_NONE
158 };
159
160 #endif
161
162 /* Variables needed for MP broadcast. */
163 static void (*mp_bc_action_func)(void *arg);
164 static void *mp_bc_func_arg;
165 static int mp_bc_ncpus;
166 static volatile long mp_bc_count;
167 static LCK_MTX_DECLARE(mp_bc_lock, &smp_lck_grp);
168 static volatile int debugger_cpu = -1;
169 volatile long NMIPI_acks = 0;
170 volatile long NMI_count = 0;
171 static int vector_timed_out;
172
173 NMI_reason_t NMI_panic_reason = NONE;
174 extern void NMI_cpus(void);
175
176 static void mp_cpus_call_init(void);
177 static void mp_cpus_call_action(void);
178 static void mp_call_PM(void);
179
180 char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
181
182 /* PAL-related routines */
183 boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
184 int ipi_vector, i386_intr_func_t ipi_handler);
185 void i386_start_cpu(int lapic_id, int cpu_num);
186 void i386_send_NMI(int cpu);
187 void NMIPI_enable(boolean_t);
188
189 #define NUM_CPU_WARM_CALLS 20
190 struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
191 queue_head_t cpu_warm_call_list;
192 decl_simple_lock_data(static, cpu_warm_lock);
193
194 typedef struct cpu_warm_data {
195 timer_call_t cwd_call;
196 uint64_t cwd_deadline;
197 int cwd_result;
198 } *cpu_warm_data_t;
199
200 static void cpu_prewarm_init(void);
201 static void cpu_warm_timer_call_func(timer_call_param_t p0, timer_call_param_t p1);
202 static void _cpu_warm_setup(void *arg);
203 static timer_call_t grab_warm_timer_call(void);
204 static void free_warm_timer_call(timer_call_t call);
205
206 void
smp_init(void)207 smp_init(void)
208 {
209 console_init();
210
211 if (!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
212 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler)) {
213 return;
214 }
215
216 cpu_thread_init();
217
218 DBGLOG_CPU_INIT(master_cpu);
219
220 mp_cpus_call_init();
221 mp_cpus_call_cpu_init(master_cpu);
222
223 #if DEBUG || DEVELOPMENT
224 if (PE_parse_boot_argn("interrupt_watchdog",
225 &mp_interrupt_watchdog_enabled,
226 sizeof(mp_interrupt_watchdog_enabled))) {
227 kprintf("Interrupt watchdog %sabled\n",
228 mp_interrupt_watchdog_enabled ? "en" : "dis");
229 }
230 #endif
231
232 if (PE_parse_boot_argn("TSC_sync_margin",
233 &TSC_sync_margin, sizeof(TSC_sync_margin))) {
234 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
235 } else if (cpuid_vmm_present()) {
236 kprintf("TSC sync margin disabled\n");
237 TSC_sync_margin = 0;
238 }
239 smp_initialized = TRUE;
240
241 cpu_prewarm_init();
242
243 return;
244 }
245
246 typedef struct {
247 int target_cpu;
248 int target_lapic;
249 int starter_cpu;
250 } processor_start_info_t;
251 static processor_start_info_t start_info __attribute__((aligned(64)));
252
253 /*
254 * Cache-alignment is to avoid cross-cpu false-sharing interference.
255 */
256 static volatile long tsc_entry_barrier __attribute__((aligned(64)));
257 static volatile long tsc_exit_barrier __attribute__((aligned(64)));
258 static volatile uint64_t tsc_target __attribute__((aligned(64)));
259
260 /*
261 * Poll a CPU to see when it has marked itself as running.
262 */
263 static void
mp_wait_for_cpu_up(int slot_num,unsigned int iters,unsigned int usecdelay)264 mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
265 {
266 while (iters-- > 0) {
267 if (cpu_datap(slot_num)->cpu_running) {
268 break;
269 }
270 delay(usecdelay);
271 }
272 }
273
274 /*
275 * Quickly bring a CPU back online which has been halted.
276 */
277 kern_return_t
intel_startCPU_fast(int slot_num)278 intel_startCPU_fast(int slot_num)
279 {
280 kern_return_t rc;
281
282 /*
283 * Try to perform a fast restart
284 */
285 rc = pmCPUExitHalt(slot_num);
286 if (rc != KERN_SUCCESS) {
287 /*
288 * The CPU was not eligible for a fast restart.
289 */
290 return rc;
291 }
292
293 KERNEL_DEBUG_CONSTANT(
294 TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
295 slot_num, 0, 0, 0, 0);
296
297 /*
298 * Wait until the CPU is back online.
299 */
300 mp_disable_preemption();
301
302 /*
303 * We use short pauses (1us) for low latency. 30,000 iterations is
304 * longer than a full restart would require so it should be more
305 * than long enough.
306 */
307
308 mp_wait_for_cpu_up(slot_num, 30000, 1);
309 mp_enable_preemption();
310
311 KERNEL_DEBUG_CONSTANT(
312 TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
313 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
314
315 /*
316 * Check to make sure that the CPU is really running. If not,
317 * go through the slow path.
318 */
319 if (cpu_datap(slot_num)->cpu_running) {
320 return KERN_SUCCESS;
321 } else {
322 return KERN_FAILURE;
323 }
324 }
325
326 static void
started_cpu(void)327 started_cpu(void)
328 {
329 /* Here on the started cpu with cpu_running set TRUE */
330
331 if (TSC_sync_margin &&
332 start_info.target_cpu == cpu_number()) {
333 /*
334 * I've just started-up, synchronize again with the starter cpu
335 * and then snap my TSC.
336 */
337 tsc_target = 0;
338 atomic_decl(&tsc_entry_barrier, 1);
339 while (tsc_entry_barrier != 0) {
340 ; /* spin for starter and target at barrier */
341 }
342 tsc_target = rdtsc64();
343 atomic_decl(&tsc_exit_barrier, 1);
344 }
345 }
346
347 static void
start_cpu(void * arg)348 start_cpu(void *arg)
349 {
350 int i = 1000;
351 processor_start_info_t *psip = (processor_start_info_t *) arg;
352
353 /* Ignore this if the current processor is not the starter */
354 if (cpu_number() != psip->starter_cpu) {
355 return;
356 }
357
358 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
359 arg, psip->target_cpu, psip->target_lapic);
360
361 KERNEL_DEBUG_CONSTANT(
362 TRACE_MP_CPU_START | DBG_FUNC_START,
363 psip->target_cpu,
364 psip->target_lapic, 0, 0, 0);
365
366 i386_start_cpu(psip->target_lapic, psip->target_cpu);
367
368 #ifdef POSTCODE_DELAY
369 /* Wait much longer if postcodes are displayed for a delay period. */
370 i *= 10000;
371 #endif
372 DBG("start_cpu(%p) about to wait for cpu %d\n",
373 arg, psip->target_cpu);
374
375 mp_wait_for_cpu_up(psip->target_cpu, i * 100, 100);
376
377 KERNEL_DEBUG_CONSTANT(
378 TRACE_MP_CPU_START | DBG_FUNC_END,
379 psip->target_cpu,
380 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
381
382 if (TSC_sync_margin &&
383 cpu_datap(psip->target_cpu)->cpu_running) {
384 /*
385 * Compare the TSC from the started processor with ours.
386 * Report and log/panic if it diverges by more than
387 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
388 * can be overriden by boot-arg (with 0 meaning no checking).
389 */
390 uint64_t tsc_starter;
391 int64_t tsc_delta;
392 atomic_decl(&tsc_entry_barrier, 1);
393 while (tsc_entry_barrier != 0) {
394 ; /* spin for both processors at barrier */
395 }
396 tsc_starter = rdtsc64();
397 atomic_decl(&tsc_exit_barrier, 1);
398 while (tsc_exit_barrier != 0) {
399 ; /* spin for target to store its TSC */
400 }
401 tsc_delta = tsc_target - tsc_starter;
402 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
403 psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
404 #if DEBUG || DEVELOPMENT
405 /*
406 * Stash the delta for inspection later, since we can no
407 * longer print/log it with interrupts disabled.
408 */
409 cpu_datap(psip->target_cpu)->tsc_sync_delta = tsc_delta;
410 #endif
411 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
412 #if DEBUG
413 panic(
414 #else
415 kprintf(
416 #endif
417 "Unsynchronized TSC for cpu %d: "
418 "0x%016llx, delta 0x%llx\n",
419 psip->target_cpu, tsc_target, tsc_delta);
420 }
421 }
422 }
423
424 kern_return_t
intel_startCPU(int slot_num)425 intel_startCPU(
426 int slot_num)
427 {
428 int lapic = cpu_to_lapic[slot_num];
429 boolean_t istate;
430
431 assert(lapic != -1);
432
433 DBGLOG_CPU_INIT(slot_num);
434
435 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
436 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
437
438 /*
439 * Initialize (or re-initialize) the descriptor tables for this cpu.
440 * Propagate processor mode to slave.
441 */
442 cpu_desc_init(cpu_datap(slot_num));
443
444 /* Serialize use of the slave boot stack, etc. */
445 lck_mtx_lock(&mp_cpu_boot_lock);
446
447 istate = ml_set_interrupts_enabled(FALSE);
448 if (slot_num == get_cpu_number()) {
449 ml_set_interrupts_enabled(istate);
450 lck_mtx_unlock(&mp_cpu_boot_lock);
451 return KERN_SUCCESS;
452 }
453
454 start_info.starter_cpu = cpu_number();
455 start_info.target_cpu = slot_num;
456 start_info.target_lapic = lapic;
457 tsc_entry_barrier = 2;
458 tsc_exit_barrier = 2;
459
460 /*
461 * Perform the processor startup sequence with all running
462 * processors rendezvous'ed. This is required during periods when
463 * the cache-disable bit is set for MTRR/PAT initialization.
464 */
465 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
466
467 start_info.target_cpu = 0;
468
469 ml_set_interrupts_enabled(istate);
470 lck_mtx_unlock(&mp_cpu_boot_lock);
471
472 if (!cpu_datap(slot_num)->cpu_running) {
473 kprintf("Failed to start CPU %02d\n", slot_num);
474 printf("Failed to start CPU %02d, rebooting...\n", slot_num);
475 delay(1000000);
476 halt_cpu();
477 return KERN_SUCCESS;
478 } else {
479 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
480 return KERN_SUCCESS;
481 }
482 }
483
484 #if MP_DEBUG
485 cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
486 cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
487
488 MP_EVENT_NAME_DECL();
489
490 #endif /* MP_DEBUG */
491
492 /*
493 * Note: called with NULL state when polling for TLB flush and cross-calls.
494 */
495 int
cpu_signal_handler(x86_saved_state_t * regs)496 cpu_signal_handler(x86_saved_state_t *regs)
497 {
498 #if !MACH_KDP
499 #pragma unused (regs)
500 #endif /* !MACH_KDP */
501 int my_cpu;
502 volatile int *my_word;
503
504 SCHED_STATS_INC(ipi_count);
505
506 my_cpu = cpu_number();
507 my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
508 /* Store the initial set of signals for diagnostics. New
509 * signals could arrive while these are being processed
510 * so it's no more than a hint.
511 */
512
513 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
514
515 do {
516 #if MACH_KDP
517 if (i_bit(MP_KDP, my_word)) {
518 DBGLOG(cpu_handle, my_cpu, MP_KDP);
519 i_bit_clear(MP_KDP, my_word);
520 /* Ensure that the i386_kernel_state at the base of the
521 * current thread's stack (if any) is synchronized with the
522 * context at the moment of the interrupt, to facilitate
523 * access through the debugger.
524 */
525 sync_iss_to_iks(regs);
526 if (pmsafe_debug && !kdp_snapshot) {
527 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
528 }
529 mp_kdp_wait(TRUE, FALSE);
530 if (pmsafe_debug && !kdp_snapshot) {
531 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
532 }
533 } else
534 #endif /* MACH_KDP */
535 if (i_bit(MP_TLB_FLUSH, my_word)) {
536 DBGLOG(cpu_handle, my_cpu, MP_TLB_FLUSH);
537 i_bit_clear(MP_TLB_FLUSH, my_word);
538 pmap_update_interrupt();
539 } else if (i_bit(MP_CALL, my_word)) {
540 DBGLOG(cpu_handle, my_cpu, MP_CALL);
541 i_bit_clear(MP_CALL, my_word);
542 mp_cpus_call_action();
543 } else if (i_bit(MP_CALL_PM, my_word)) {
544 DBGLOG(cpu_handle, my_cpu, MP_CALL_PM);
545 i_bit_clear(MP_CALL_PM, my_word);
546 mp_call_PM();
547 }
548 if (regs == NULL) {
549 /* Called to poll only for cross-calls and TLB flush */
550 break;
551 } else if (i_bit(MP_AST, my_word)) {
552 DBGLOG(cpu_handle, my_cpu, MP_AST);
553 i_bit_clear(MP_AST, my_word);
554 ast_check(cpu_to_processor(my_cpu));
555 }
556 } while (*my_word);
557
558 return 0;
559 }
560
561 long
NMI_pte_corruption_callback(__unused void * arg0,__unused void * arg1,uint16_t lcpu)562 NMI_pte_corruption_callback(__unused void *arg0, __unused void *arg1, uint16_t lcpu)
563 {
564 static char pstr[256]; /* global since this callback is serialized */
565 void *stackptr;
566 __asm__ volatile ("movq %%rbp, %0" : "=m" (stackptr));
567
568 snprintf(&pstr[0], sizeof(pstr),
569 "Panic(CPU %d): PTE corruption detected on PTEP 0x%llx VAL 0x%llx\n",
570 lcpu, (unsigned long long)(uintptr_t)PTE_corrupted_ptr, *(uint64_t *)PTE_corrupted_ptr);
571 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, current_cpu_datap()->cpu_int_state);
572 return 0;
573 }
574
575 extern void kprintf_break_lock(void);
576 int
NMIInterruptHandler(x86_saved_state_t * regs)577 NMIInterruptHandler(x86_saved_state_t *regs)
578 {
579 void *stackptr;
580 char pstr[256];
581 uint64_t now = mach_absolute_time();
582
583 if (panic_active() && !panicDebugging) {
584 if (pmsafe_debug) {
585 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
586 }
587 for (;;) {
588 cpu_pause();
589 }
590 }
591
592 atomic_incl(&NMIPI_acks, 1);
593 atomic_incl(&NMI_count, 1);
594 sync_iss_to_iks_unconditionally(regs);
595 __asm__ volatile ("movq %%rbp, %0" : "=m" (stackptr));
596
597 if (cpu_number() == debugger_cpu) {
598 goto NMExit;
599 }
600
601 if (NMI_panic_reason == SPINLOCK_TIMEOUT) {
602 lck_spinlock_to_info_t lsti;
603
604 lsti = os_atomic_load(&lck_spinlock_timeout_in_progress, acquire);
605 snprintf(&pstr[0], sizeof(pstr),
606 "Panic(CPU %d, time %llu): NMIPI for spinlock acquisition timeout, spinlock: %p, "
607 "spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n",
608 cpu_number(), now, lsti->lock, (void *)lsti->owner_thread_cur,
609 current_thread(), lsti->owner_cpu);
610 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
611 } else if (NMI_panic_reason == TLB_FLUSH_TIMEOUT) {
612 snprintf(&pstr[0], sizeof(pstr),
613 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: TLB flush timeout, TLB state:0x%x\n",
614 cpu_number(), now, current_cpu_datap()->cpu_tlb_invalid);
615 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
616 } else if (NMI_panic_reason == CROSSCALL_TIMEOUT) {
617 snprintf(&pstr[0], sizeof(pstr),
618 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: cross-call timeout\n",
619 cpu_number(), now);
620 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
621 } else if (NMI_panic_reason == INTERRUPT_WATCHDOG) {
622 snprintf(&pstr[0], sizeof(pstr),
623 "Panic(CPU %d, time %llu): NMIPI for unresponsive processor: interrupt watchdog for vector 0x%x\n",
624 cpu_number(), now, vector_timed_out);
625 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
626 }
627
628 #if MACH_KDP
629 if (pmsafe_debug && !kdp_snapshot) {
630 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
631 }
632 current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
633 i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals);
634 if (panic_active() || NMI_panic_reason != NONE) {
635 mp_kdp_wait(FALSE, TRUE);
636 } else if (!mp_kdp_trap &&
637 !mp_kdp_is_NMI &&
638 virtualized && (debug_boot_arg & DB_NMI)) {
639 /*
640 * Under a VMM with the debug boot-arg set, drop into kdp.
641 * Since an NMI is involved, there's a risk of contending with
642 * a panic. And side-effects of NMIs may result in entry into,
643 * and continuing from, the debugger being unreliable.
644 */
645 if (__sync_bool_compare_and_swap(&mp_kdp_is_NMI, FALSE, TRUE)) {
646 kprintf_break_lock();
647
648 DebuggerWithContext(EXC_BREAKPOINT, saved_state64(regs),
649 "requested by NMI", DEBUGGER_OPTION_NONE,
650 (unsigned long)(char *)__builtin_return_address(0));
651
652 mp_kdp_is_NMI = FALSE;
653 } else {
654 mp_kdp_wait(FALSE, FALSE);
655 }
656 } else {
657 mp_kdp_wait(FALSE, FALSE);
658 }
659 if (pmsafe_debug && !kdp_snapshot) {
660 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
661 }
662 #endif
663 NMExit:
664 return 1;
665 }
666
667 /*
668 * cpu_interrupt is really just to be used by the scheduler to
669 * get a CPU's attention it may not always issue an IPI. If an
670 * IPI is always needed then use i386_cpu_IPI.
671 */
672 void
cpu_interrupt(int cpu)673 cpu_interrupt(int cpu)
674 {
675 boolean_t did_IPI = FALSE;
676
677 if (smp_initialized
678 && pmCPUExitIdle(cpu_datap(cpu))) {
679 i386_cpu_IPI(cpu);
680 did_IPI = TRUE;
681 }
682
683 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
684 }
685
686 /*
687 * Send a true NMI via the local APIC to the specified CPU.
688 */
689 void
cpu_NMI_interrupt(int cpu)690 cpu_NMI_interrupt(int cpu)
691 {
692 if (smp_initialized) {
693 i386_send_NMI(cpu);
694 }
695 }
696
697 void
NMI_cpus(void)698 NMI_cpus(void)
699 {
700 unsigned int cpu;
701 boolean_t intrs_enabled;
702 uint64_t tsc_timeout;
703
704 intrs_enabled = ml_set_interrupts_enabled(FALSE);
705 NMIPI_enable(TRUE);
706 for (cpu = 0; cpu < real_ncpus; cpu++) {
707 if (!cpu_is_running(cpu)) {
708 continue;
709 }
710 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
711 cpu_NMI_interrupt(cpu);
712 tsc_timeout = !machine_timeout_suspended() ?
713 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
714 ~0ULL;
715 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
716 handle_pending_TLB_flushes();
717 cpu_pause();
718 if (rdtsc64() > tsc_timeout) {
719 panic("NMI_cpus() timeout cpu %d", cpu);
720 }
721 }
722 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
723 }
724 NMIPI_enable(FALSE);
725
726 ml_set_interrupts_enabled(intrs_enabled);
727 }
728
729 static void(*volatile mp_PM_func)(void) = NULL;
730
731 static void
mp_call_PM(void)732 mp_call_PM(void)
733 {
734 assert(!ml_get_interrupts_enabled());
735
736 if (mp_PM_func != NULL) {
737 mp_PM_func();
738 }
739 }
740
741 void
cpu_PM_interrupt(int cpu)742 cpu_PM_interrupt(int cpu)
743 {
744 assert(!ml_get_interrupts_enabled());
745
746 if (mp_PM_func != NULL) {
747 if (cpu == cpu_number()) {
748 mp_PM_func();
749 } else {
750 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
751 }
752 }
753 }
754
755 void
PM_interrupt_register(void (* fn)(void))756 PM_interrupt_register(void (*fn)(void))
757 {
758 mp_PM_func = fn;
759 }
760
761 void
i386_signal_cpu(int cpu,mp_event_t event,mp_sync_t mode)762 i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
763 {
764 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
765 uint64_t tsc_timeout;
766
767
768 if (!cpu_datap(cpu)->cpu_running) {
769 return;
770 }
771
772 if (event == MP_TLB_FLUSH) {
773 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
774 }
775
776 DBGLOG(cpu_signal, cpu, event);
777
778 i_bit_set(event, signals);
779 i386_cpu_IPI(cpu);
780 if (mode == SYNC) {
781 again:
782 tsc_timeout = !machine_timeout_suspended() ?
783 rdtsc64() + (1000 * 1000 * 1000) :
784 ~0ULL;
785 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
786 cpu_pause();
787 }
788 if (i_bit(event, signals)) {
789 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
790 cpu, event);
791 goto again;
792 }
793 }
794 if (event == MP_TLB_FLUSH) {
795 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
796 }
797 }
798
799 /*
800 * Helper function called when busy-waiting: panic if too long
801 * a TSC-based time has elapsed since the start of the spin.
802 */
803 static boolean_t
mp_spin_timeout(uint64_t tsc_start)804 mp_spin_timeout(uint64_t tsc_start)
805 {
806 uint64_t tsc_timeout;
807
808 cpu_pause();
809 if (machine_timeout_suspended()) {
810 return FALSE;
811 }
812
813 /*
814 * The timeout is 4 * the spinlock timeout period
815 * unless we have serial console printing (kprintf) enabled
816 * in which case we allow an even greater margin.
817 */
818 tsc_timeout = disable_serial_output ? LockTimeOutTSC << 2
819 : LockTimeOutTSC << 4;
820 return rdtsc64() > tsc_start + tsc_timeout;
821 }
822
823 /*
824 * Helper function to take a spinlock while ensuring that incoming IPIs
825 * are still serviced if interrupts are masked while we spin.
826 * Returns current interrupt state.
827 */
828 boolean_t
mp_safe_spin_lock(usimple_lock_t lock)829 mp_safe_spin_lock(usimple_lock_t lock)
830 {
831 if (ml_get_interrupts_enabled()) {
832 simple_lock(lock, LCK_GRP_NULL);
833 return TRUE;
834 }
835
836 lck_spinlock_to_info_t lsti;
837 uint64_t tsc_spin_start = rdtsc64();
838
839 while (!simple_lock_try(lock, LCK_GRP_NULL)) {
840 cpu_signal_handler(NULL);
841 if (mp_spin_timeout(tsc_spin_start)) {
842 uintptr_t lowner = (uintptr_t)lock->interlock.lock_data;
843
844 lsti = lck_spinlock_timeout_hit(lock, lowner);
845 NMIPI_panic(cpu_to_cpumask(lsti->owner_cpu), SPINLOCK_TIMEOUT);
846 panic("mp_safe_spin_lock() timed out, lock: %p, "
847 "owner thread: 0x%lx, current_thread: %p, "
848 "owner on CPU 0x%x, time: %llu",
849 lock, lowner, current_thread(),
850 lsti->owner_cpu, mach_absolute_time());
851 }
852 }
853
854 return FALSE;
855 }
856
857 /*
858 * All-CPU rendezvous:
859 * - CPUs are signalled,
860 * - all execute the setup function (if specified),
861 * - rendezvous (i.e. all cpus reach a barrier),
862 * - all execute the action function (if specified),
863 * - rendezvous again,
864 * - execute the teardown function (if specified), and then
865 * - resume.
866 *
867 * Note that the supplied external functions _must_ be reentrant and aware
868 * that they are running in parallel and in an unknown lock context.
869 */
870
871 static void
mp_rendezvous_action(__unused void * null)872 mp_rendezvous_action(__unused void *null)
873 {
874 boolean_t intrs_enabled;
875 uint64_t tsc_spin_start;
876
877 /*
878 * Note that mp_rv_lock was acquired by the thread that initiated the
879 * rendezvous and must have been acquired before we enter
880 * mp_rendezvous_action().
881 */
882 current_cpu_datap()->cpu_rendezvous_in_progress = TRUE;
883
884 /* setup function */
885 if (mp_rv_setup_func != NULL) {
886 mp_rv_setup_func(mp_rv_func_arg);
887 }
888
889 intrs_enabled = ml_get_interrupts_enabled();
890
891 /* spin on entry rendezvous */
892 atomic_incl(&mp_rv_entry, 1);
893 tsc_spin_start = rdtsc64();
894
895 while (mp_rv_entry < mp_rv_ncpus) {
896 /* poll for pesky tlb flushes if interrupts disabled */
897 if (!intrs_enabled) {
898 handle_pending_TLB_flushes();
899 }
900 if (mp_spin_timeout(tsc_spin_start)) {
901 panic("mp_rv_action() entry: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_entry, mp_rv_ncpus, tsc_spin_start, rdtsc64());
902 }
903 }
904
905 /* action function */
906 if (mp_rv_action_func != NULL) {
907 mp_rv_action_func(mp_rv_func_arg);
908 }
909
910 /* spin on exit rendezvous */
911 atomic_incl(&mp_rv_exit, 1);
912 tsc_spin_start = rdtsc64();
913 while (mp_rv_exit < mp_rv_ncpus) {
914 if (!intrs_enabled) {
915 handle_pending_TLB_flushes();
916 }
917 if (mp_spin_timeout(tsc_spin_start)) {
918 panic("mp_rv_action() exit: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_exit, mp_rv_ncpus, tsc_spin_start, rdtsc64());
919 }
920 }
921
922 /* teardown function */
923 if (mp_rv_teardown_func != NULL) {
924 mp_rv_teardown_func(mp_rv_func_arg);
925 }
926
927 current_cpu_datap()->cpu_rendezvous_in_progress = FALSE;
928
929 /* Bump completion count */
930 atomic_incl(&mp_rv_complete, 1);
931 }
932
933 void
mp_rendezvous(void (* setup_func)(void *),void (* action_func)(void *),void (* teardown_func)(void *),void * arg)934 mp_rendezvous(void (*setup_func)(void *),
935 void (*action_func)(void *),
936 void (*teardown_func)(void *),
937 void *arg)
938 {
939 uint64_t tsc_spin_start;
940
941 if (!smp_initialized) {
942 if (setup_func != NULL) {
943 setup_func(arg);
944 }
945 if (action_func != NULL) {
946 action_func(arg);
947 }
948 if (teardown_func != NULL) {
949 teardown_func(arg);
950 }
951 return;
952 }
953
954 /* obtain rendezvous lock */
955 mp_rendezvous_lock();
956
957 /* set static function pointers */
958 mp_rv_setup_func = setup_func;
959 mp_rv_action_func = action_func;
960 mp_rv_teardown_func = teardown_func;
961 mp_rv_func_arg = arg;
962
963 mp_rv_entry = 0;
964 mp_rv_exit = 0;
965 mp_rv_complete = 0;
966
967 /*
968 * signal other processors, which will call mp_rendezvous_action()
969 * with interrupts disabled
970 */
971 mp_rv_ncpus = mp_cpus_call(CPUMASK_OTHERS, NOSYNC, &mp_rendezvous_action, NULL) + 1;
972
973 /* call executor function on this cpu */
974 mp_rendezvous_action(NULL);
975
976 /*
977 * Spin for everyone to complete.
978 * This is necessary to ensure that all processors have proceeded
979 * from the exit barrier before we release the rendezvous structure.
980 */
981 tsc_spin_start = rdtsc64();
982 while (mp_rv_complete < mp_rv_ncpus) {
983 if (mp_spin_timeout(tsc_spin_start)) {
984 panic("mp_rendezvous() timeout: %ld of %d responses, start: 0x%llx, cur: 0x%llx", mp_rv_complete, mp_rv_ncpus, tsc_spin_start, rdtsc64());
985 }
986 }
987
988 /* Tidy up */
989 mp_rv_setup_func = NULL;
990 mp_rv_action_func = NULL;
991 mp_rv_teardown_func = NULL;
992 mp_rv_func_arg = NULL;
993
994 /* release lock */
995 mp_rendezvous_unlock();
996 }
997
998 void
mp_rendezvous_lock(void)999 mp_rendezvous_lock(void)
1000 {
1001 (void) mp_safe_spin_lock(&mp_rv_lock);
1002 }
1003
1004 void
mp_rendezvous_unlock(void)1005 mp_rendezvous_unlock(void)
1006 {
1007 simple_unlock(&mp_rv_lock);
1008 }
1009
1010 void
mp_rendezvous_break_lock(void)1011 mp_rendezvous_break_lock(void)
1012 {
1013 simple_lock_init(&mp_rv_lock, 0);
1014 }
1015
1016 static void
setup_disable_intrs(__unused void * param_not_used)1017 setup_disable_intrs(__unused void * param_not_used)
1018 {
1019 /* disable interrupts before the first barrier */
1020 boolean_t intr = ml_set_interrupts_enabled(FALSE);
1021
1022 current_cpu_datap()->cpu_iflag = intr;
1023 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
1024 }
1025
1026 static void
teardown_restore_intrs(__unused void * param_not_used)1027 teardown_restore_intrs(__unused void * param_not_used)
1028 {
1029 /* restore interrupt flag following MTRR changes */
1030 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
1031 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
1032 }
1033
1034 /*
1035 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
1036 * This is exported for use by kexts.
1037 */
1038 void
mp_rendezvous_no_intrs(void (* action_func)(void *),void * arg)1039 mp_rendezvous_no_intrs(
1040 void (*action_func)(void *),
1041 void *arg)
1042 {
1043 mp_rendezvous(setup_disable_intrs,
1044 action_func,
1045 teardown_restore_intrs,
1046 arg);
1047 }
1048
1049
1050 typedef struct {
1051 queue_chain_t link; /* queue linkage */
1052 void (*func)(void *, void *); /* routine to call */
1053 void *arg0; /* routine's 1st arg */
1054 void *arg1; /* routine's 2nd arg */
1055 cpumask_t *maskp; /* completion response mask */
1056 } mp_call_t;
1057
1058
1059 typedef struct {
1060 queue_head_t queue;
1061 decl_simple_lock_data(, lock);
1062 } mp_call_queue_t;
1063 #define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
1064 static mp_call_queue_t mp_cpus_call_freelist;
1065 static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
1066
1067 static inline boolean_t
mp_call_head_lock(mp_call_queue_t * cqp)1068 mp_call_head_lock(mp_call_queue_t *cqp)
1069 {
1070 boolean_t intrs_enabled;
1071
1072 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1073 simple_lock(&cqp->lock, LCK_GRP_NULL);
1074
1075 return intrs_enabled;
1076 }
1077
1078 /*
1079 * Deliver an NMIPI to a set of processors to cause them to panic .
1080 */
1081 void
NMIPI_panic(cpumask_t cpu_mask,NMI_reason_t why)1082 NMIPI_panic(cpumask_t cpu_mask, NMI_reason_t why)
1083 {
1084 unsigned int cpu;
1085 cpumask_t cpu_bit;
1086 uint64_t deadline;
1087
1088 NMIPI_enable(TRUE);
1089 NMI_panic_reason = why;
1090
1091 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1092 if ((cpu_mask & cpu_bit) == 0) {
1093 continue;
1094 }
1095 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1096 cpu_NMI_interrupt(cpu);
1097 }
1098
1099 /* Wait (only so long) for NMi'ed cpus to respond */
1100 deadline = mach_absolute_time() + LockTimeOut;
1101 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1102 if ((cpu_mask & cpu_bit) == 0) {
1103 continue;
1104 }
1105 while (!cpu_datap(cpu)->cpu_NMI_acknowledged &&
1106 mach_absolute_time() < deadline) {
1107 cpu_pause();
1108 }
1109 }
1110 }
1111
1112 #if MACH_ASSERT
1113 static inline boolean_t
mp_call_head_is_locked(mp_call_queue_t * cqp)1114 mp_call_head_is_locked(mp_call_queue_t *cqp)
1115 {
1116 return !ml_get_interrupts_enabled() &&
1117 hw_lock_held((hw_lock_t)&cqp->lock);
1118 }
1119 #endif
1120
1121 static inline void
mp_call_head_unlock(mp_call_queue_t * cqp,boolean_t intrs_enabled)1122 mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
1123 {
1124 simple_unlock(&cqp->lock);
1125 ml_set_interrupts_enabled(intrs_enabled);
1126 }
1127
1128 static inline mp_call_t *
mp_call_alloc(void)1129 mp_call_alloc(void)
1130 {
1131 mp_call_t *callp = NULL;
1132 boolean_t intrs_enabled;
1133 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1134
1135 intrs_enabled = mp_call_head_lock(cqp);
1136 if (!queue_empty(&cqp->queue)) {
1137 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1138 }
1139 mp_call_head_unlock(cqp, intrs_enabled);
1140
1141 return callp;
1142 }
1143
1144 static inline void
mp_call_free(mp_call_t * callp)1145 mp_call_free(mp_call_t *callp)
1146 {
1147 boolean_t intrs_enabled;
1148 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1149
1150 intrs_enabled = mp_call_head_lock(cqp);
1151 queue_enter_first(&cqp->queue, callp, typeof(callp), link);
1152 mp_call_head_unlock(cqp, intrs_enabled);
1153 }
1154
1155 static inline mp_call_t *
mp_call_dequeue_locked(mp_call_queue_t * cqp)1156 mp_call_dequeue_locked(mp_call_queue_t *cqp)
1157 {
1158 mp_call_t *callp = NULL;
1159
1160 assert(mp_call_head_is_locked(cqp));
1161 if (!queue_empty(&cqp->queue)) {
1162 queue_remove_first(&cqp->queue, callp, typeof(callp), link);
1163 }
1164 return callp;
1165 }
1166
1167 static inline void
mp_call_enqueue_locked(mp_call_queue_t * cqp,mp_call_t * callp)1168 mp_call_enqueue_locked(
1169 mp_call_queue_t *cqp,
1170 mp_call_t *callp)
1171 {
1172 queue_enter(&cqp->queue, callp, typeof(callp), link);
1173 }
1174
1175 /* Called on the boot processor to initialize global structures */
1176 static void
mp_cpus_call_init(void)1177 mp_cpus_call_init(void)
1178 {
1179 mp_call_queue_t *cqp = &mp_cpus_call_freelist;
1180
1181 DBG("mp_cpus_call_init()\n");
1182 simple_lock_init(&cqp->lock, 0);
1183 queue_init(&cqp->queue);
1184 }
1185
1186 /*
1187 * Called at processor registration to add call buffers to the free list
1188 * and to initialize the per-cpu call queue.
1189 */
1190 void
mp_cpus_call_cpu_init(int cpu)1191 mp_cpus_call_cpu_init(int cpu)
1192 {
1193 int i;
1194 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1195 mp_call_t *callp;
1196
1197 simple_lock_init(&cqp->lock, 0);
1198 queue_init(&cqp->queue);
1199 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
1200 callp = zalloc_permanent_type(mp_call_t);
1201 mp_call_free(callp);
1202 }
1203
1204 DBG("mp_cpus_call_init(%d) done\n", cpu);
1205 }
1206
1207 /*
1208 * This is called from cpu_signal_handler() to process an MP_CALL signal.
1209 * And also from i386_deactivate_cpu() when a cpu is being taken offline.
1210 */
1211 static void
mp_cpus_call_action(void)1212 mp_cpus_call_action(void)
1213 {
1214 mp_call_queue_t *cqp;
1215 boolean_t intrs_enabled;
1216 mp_call_t *callp;
1217 mp_call_t call;
1218
1219 assert(!ml_get_interrupts_enabled());
1220 cqp = &mp_cpus_call_head[cpu_number()];
1221 intrs_enabled = mp_call_head_lock(cqp);
1222 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) {
1223 /* Copy call request to the stack to free buffer */
1224 call = *callp;
1225 mp_call_free(callp);
1226 if (call.func != NULL) {
1227 mp_call_head_unlock(cqp, intrs_enabled);
1228 KERNEL_DEBUG_CONSTANT(
1229 TRACE_MP_CPUS_CALL_ACTION,
1230 VM_KERNEL_UNSLIDE(call.func), VM_KERNEL_UNSLIDE_OR_PERM(call.arg0),
1231 VM_KERNEL_UNSLIDE_OR_PERM(call.arg1), VM_KERNEL_ADDRPERM(call.maskp), 0);
1232 call.func(call.arg0, call.arg1);
1233 (void) mp_call_head_lock(cqp);
1234 }
1235 if (call.maskp != NULL) {
1236 i_bit_set(cpu_number(), call.maskp);
1237 }
1238 }
1239 mp_call_head_unlock(cqp, intrs_enabled);
1240 }
1241
1242 #pragma clang diagnostic push
1243 #pragma clang diagnostic ignored "-Wcast-function-type"
1244
1245 /*
1246 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
1247 * Possible modes are:
1248 * SYNC: function is called serially on target cpus in logical cpu order
1249 * waiting for each call to be acknowledged before proceeding
1250 * ASYNC: function call is queued to the specified cpus
1251 * waiting for all calls to complete in parallel before returning
1252 * NOSYNC: function calls are queued
1253 * but we return before confirmation of calls completing.
1254 * The action function may be NULL.
1255 * The cpu mask may include the local cpu. Offline cpus are ignored.
1256 * The return value is the number of cpus on which the call was made or queued.
1257 */
1258 cpu_t
mp_cpus_call(cpumask_t cpus,mp_sync_t mode,void (* action_func)(void *),void * arg)1259 mp_cpus_call(
1260 cpumask_t cpus,
1261 mp_sync_t mode,
1262 void (*action_func)(void *),
1263 void *arg)
1264 {
1265 return mp_cpus_call1(
1266 cpus,
1267 mode,
1268 (void (*)(void *, void *))action_func,
1269 arg,
1270 NULL,
1271 NULL);
1272 }
1273
1274 #pragma clang diagnostic pop
1275
1276 static void
mp_cpus_call_wait(boolean_t intrs_enabled,cpumask_t cpus_called,cpumask_t * cpus_responded)1277 mp_cpus_call_wait(boolean_t intrs_enabled,
1278 cpumask_t cpus_called,
1279 cpumask_t *cpus_responded)
1280 {
1281 mp_call_queue_t *cqp;
1282 uint64_t tsc_spin_start;
1283
1284 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
1285 cqp = &mp_cpus_call_head[cpu_number()];
1286
1287 tsc_spin_start = rdtsc64();
1288 while (*cpus_responded != cpus_called) {
1289 if (!intrs_enabled) {
1290 /* Sniffing w/o locking */
1291 if (!queue_empty(&cqp->queue)) {
1292 mp_cpus_call_action();
1293 }
1294 cpu_signal_handler(NULL);
1295 }
1296 if (mp_spin_timeout(tsc_spin_start)) {
1297 cpumask_t cpus_unresponsive;
1298
1299 cpus_unresponsive = cpus_called & ~(*cpus_responded);
1300 NMIPI_panic(cpus_unresponsive, CROSSCALL_TIMEOUT);
1301 panic("mp_cpus_call_wait() timeout, cpus: 0x%llx",
1302 cpus_unresponsive);
1303 }
1304 }
1305 }
1306
1307 cpu_t
mp_cpus_call1(cpumask_t cpus,mp_sync_t mode,void (* action_func)(void *,void *),void * arg0,void * arg1,cpumask_t * cpus_calledp)1308 mp_cpus_call1(
1309 cpumask_t cpus,
1310 mp_sync_t mode,
1311 void (*action_func)(void *, void *),
1312 void *arg0,
1313 void *arg1,
1314 cpumask_t *cpus_calledp)
1315 {
1316 cpu_t cpu = 0;
1317 boolean_t intrs_enabled = FALSE;
1318 boolean_t call_self = FALSE;
1319 cpumask_t cpus_called = 0;
1320 cpumask_t cpus_responded = 0;
1321 long cpus_call_count = 0;
1322 uint64_t tsc_spin_start;
1323 boolean_t topo_lock;
1324
1325 KERNEL_DEBUG_CONSTANT(
1326 TRACE_MP_CPUS_CALL | DBG_FUNC_START,
1327 cpus, mode, VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1));
1328
1329 if (!smp_initialized) {
1330 if ((cpus & CPUMASK_SELF) == 0) {
1331 goto out;
1332 }
1333 if (action_func != NULL) {
1334 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1335 action_func(arg0, arg1);
1336 ml_set_interrupts_enabled(intrs_enabled);
1337 }
1338 call_self = TRUE;
1339 goto out;
1340 }
1341
1342 /*
1343 * Queue the call for each non-local requested cpu.
1344 * This is performed under the topo lock to prevent changes to
1345 * cpus online state and to prevent concurrent rendezvouses --
1346 * although an exception is made if we're calling only the master
1347 * processor since that always remains active. Note: this exception
1348 * is expected for longterm timer nosync cross-calls to the master cpu.
1349 */
1350 mp_disable_preemption();
1351 intrs_enabled = ml_get_interrupts_enabled();
1352 topo_lock = (cpus != cpu_to_cpumask(master_cpu));
1353 if (topo_lock) {
1354 ml_set_interrupts_enabled(FALSE);
1355 (void) mp_safe_spin_lock(&x86_topo_lock);
1356 }
1357 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1358 if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
1359 !cpu_is_running(cpu)) {
1360 continue;
1361 }
1362 tsc_spin_start = rdtsc64();
1363 if (cpu == (cpu_t) cpu_number()) {
1364 /*
1365 * We don't IPI ourself and if calling asynchronously,
1366 * we defer our call until we have signalled all others.
1367 */
1368 call_self = TRUE;
1369 if (mode == SYNC && action_func != NULL) {
1370 KERNEL_DEBUG_CONSTANT(
1371 TRACE_MP_CPUS_CALL_LOCAL,
1372 VM_KERNEL_UNSLIDE(action_func),
1373 VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1374 action_func(arg0, arg1);
1375 }
1376 } else {
1377 /*
1378 * Here to queue a call to cpu and IPI.
1379 */
1380 mp_call_t *callp = NULL;
1381 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu];
1382 boolean_t intrs_inner;
1383
1384 queue_call:
1385 if (callp == NULL) {
1386 callp = mp_call_alloc();
1387 }
1388 intrs_inner = mp_call_head_lock(cqp);
1389 if (callp == NULL) {
1390 mp_call_head_unlock(cqp, intrs_inner);
1391 KERNEL_DEBUG_CONSTANT(
1392 TRACE_MP_CPUS_CALL_NOBUF,
1393 cpu, 0, 0, 0, 0);
1394 if (!intrs_inner) {
1395 /* Sniffing w/o locking */
1396 if (!queue_empty(&cqp->queue)) {
1397 mp_cpus_call_action();
1398 }
1399 handle_pending_TLB_flushes();
1400 }
1401 if (mp_spin_timeout(tsc_spin_start)) {
1402 panic("mp_cpus_call1() timeout start: 0x%llx, cur: 0x%llx",
1403 tsc_spin_start, rdtsc64());
1404 }
1405 goto queue_call;
1406 }
1407 callp->maskp = (mode == NOSYNC) ? NULL : &cpus_responded;
1408 callp->func = action_func;
1409 callp->arg0 = arg0;
1410 callp->arg1 = arg1;
1411 mp_call_enqueue_locked(cqp, callp);
1412 cpus_call_count++;
1413 cpus_called |= cpu_to_cpumask(cpu);
1414 i386_signal_cpu(cpu, MP_CALL, ASYNC);
1415 mp_call_head_unlock(cqp, intrs_inner);
1416 if (mode == SYNC) {
1417 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded);
1418 }
1419 }
1420 }
1421 if (topo_lock) {
1422 simple_unlock(&x86_topo_lock);
1423 ml_set_interrupts_enabled(intrs_enabled);
1424 }
1425
1426 /* Call locally if mode not SYNC */
1427 if (mode != SYNC && call_self) {
1428 KERNEL_DEBUG_CONSTANT(
1429 TRACE_MP_CPUS_CALL_LOCAL,
1430 VM_KERNEL_UNSLIDE(action_func), VM_KERNEL_UNSLIDE_OR_PERM(arg0), VM_KERNEL_UNSLIDE_OR_PERM(arg1), 0, 0);
1431 if (action_func != NULL) {
1432 ml_set_interrupts_enabled(FALSE);
1433 action_func(arg0, arg1);
1434 ml_set_interrupts_enabled(intrs_enabled);
1435 }
1436 }
1437
1438 /* For ASYNC, now wait for all signaled cpus to complete their calls */
1439 if (mode == ASYNC) {
1440 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded);
1441 }
1442
1443 /* Safe to allow pre-emption now */
1444 mp_enable_preemption();
1445
1446 out:
1447 if (call_self) {
1448 cpus_called |= cpu_to_cpumask(cpu);
1449 cpus_call_count++;
1450 }
1451
1452 if (cpus_calledp) {
1453 *cpus_calledp = cpus_called;
1454 }
1455
1456 KERNEL_DEBUG_CONSTANT(
1457 TRACE_MP_CPUS_CALL | DBG_FUNC_END,
1458 cpus_call_count, cpus_called, 0, 0, 0);
1459
1460 return (cpu_t) cpus_call_count;
1461 }
1462
1463
1464 static void
mp_broadcast_action(__unused void * null)1465 mp_broadcast_action(__unused void *null)
1466 {
1467 /* call action function */
1468 if (mp_bc_action_func != NULL) {
1469 mp_bc_action_func(mp_bc_func_arg);
1470 }
1471
1472 /* if we're the last one through, wake up the instigator */
1473 if (atomic_decl_and_test(&mp_bc_count, 1)) {
1474 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count));
1475 }
1476 }
1477
1478 /*
1479 * mp_broadcast() runs a given function on all active cpus.
1480 * The caller blocks until the functions has run on all cpus.
1481 * The caller will also block if there is another pending broadcast.
1482 */
1483 void
mp_broadcast(void (* action_func)(void *),void * arg)1484 mp_broadcast(
1485 void (*action_func)(void *),
1486 void *arg)
1487 {
1488 if (!smp_initialized) {
1489 if (action_func != NULL) {
1490 action_func(arg);
1491 }
1492 return;
1493 }
1494
1495 /* obtain broadcast lock */
1496 lck_mtx_lock(&mp_bc_lock);
1497
1498 /* set static function pointers */
1499 mp_bc_action_func = action_func;
1500 mp_bc_func_arg = arg;
1501
1502 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT);
1503
1504 /*
1505 * signal other processors, which will call mp_broadcast_action()
1506 */
1507 mp_bc_count = real_ncpus; /* assume max possible active */
1508 mp_bc_ncpus = mp_cpus_call(CPUMASK_ALL, NOSYNC, *mp_broadcast_action, NULL);
1509 atomic_decl(&mp_bc_count, real_ncpus - mp_bc_ncpus); /* subtract inactive */
1510
1511 /* block for other cpus to have run action_func */
1512 if (mp_bc_ncpus > 1) {
1513 thread_block(THREAD_CONTINUE_NULL);
1514 } else {
1515 clear_wait(current_thread(), THREAD_AWAKENED);
1516 }
1517
1518 /* release lock */
1519 lck_mtx_unlock(&mp_bc_lock);
1520 }
1521
1522 void
mp_cpus_kick(cpumask_t cpus)1523 mp_cpus_kick(cpumask_t cpus)
1524 {
1525 cpu_t cpu;
1526 boolean_t intrs_enabled = FALSE;
1527
1528 intrs_enabled = ml_set_interrupts_enabled(FALSE);
1529 mp_safe_spin_lock(&x86_topo_lock);
1530
1531 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
1532 if (((cpu_to_cpumask(cpu) & cpus) == 0)
1533 || !cpu_is_running(cpu)) {
1534 continue;
1535 }
1536
1537 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK));
1538 }
1539
1540 simple_unlock(&x86_topo_lock);
1541 ml_set_interrupts_enabled(intrs_enabled);
1542 }
1543
1544 void
i386_activate_cpu(void)1545 i386_activate_cpu(void)
1546 {
1547 cpu_data_t *cdp = current_cpu_datap();
1548
1549 assert(!ml_get_interrupts_enabled());
1550
1551 if (!smp_initialized) {
1552 cdp->cpu_running = TRUE;
1553 return;
1554 }
1555
1556 mp_safe_spin_lock(&x86_topo_lock);
1557 cdp->cpu_running = TRUE;
1558 started_cpu();
1559 pmap_tlbi_range(0, ~0ULL, true, 0);
1560 simple_unlock(&x86_topo_lock);
1561 }
1562
1563 void
i386_deactivate_cpu(void)1564 i386_deactivate_cpu(void)
1565 {
1566 cpu_data_t *cdp = current_cpu_datap();
1567
1568 assert(!ml_get_interrupts_enabled());
1569
1570 KERNEL_DEBUG_CONSTANT(
1571 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START,
1572 0, 0, 0, 0, 0);
1573
1574 mp_safe_spin_lock(&x86_topo_lock);
1575 cdp->cpu_running = FALSE;
1576 simple_unlock(&x86_topo_lock);
1577
1578 /*
1579 * Move all of this cpu's timers to the master/boot cpu,
1580 * and poke it in case there's a sooner deadline for it to schedule.
1581 */
1582 timer_queue_shutdown(&cdp->rtclock_timer.queue);
1583 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL);
1584
1585 #if CONFIG_CPU_COUNTERS
1586 mt_cpu_down(cdp);
1587 #endif /* CONFIG_CPU_COUNTERS */
1588 #if KPERF
1589 kptimer_stop_curcpu();
1590 #endif /* KPERF */
1591
1592 /*
1593 * Open an interrupt window
1594 * and ensure any pending IPI or timer is serviced
1595 */
1596 mp_disable_preemption();
1597 ml_set_interrupts_enabled(TRUE);
1598
1599 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime) {
1600 cpu_pause();
1601 }
1602 /*
1603 * Ensure there's no remaining timer deadline set
1604 * - AICPM may have left one active.
1605 */
1606 setPop(0);
1607
1608 ml_set_interrupts_enabled(FALSE);
1609 mp_enable_preemption();
1610
1611 KERNEL_DEBUG_CONSTANT(
1612 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END,
1613 0, 0, 0, 0, 0);
1614 }
1615
1616 int pmsafe_debug = 1;
1617
1618 #if MACH_KDP
1619 volatile boolean_t mp_kdp_trap = FALSE;
1620 volatile boolean_t mp_kdp_is_NMI = FALSE;
1621 volatile unsigned long mp_kdp_ncpus;
1622 boolean_t mp_kdp_state;
1623
1624
1625 void
mp_kdp_enter(boolean_t proceed_on_failure)1626 mp_kdp_enter(boolean_t proceed_on_failure)
1627 {
1628 unsigned int cpu;
1629 unsigned int ncpus = 0;
1630 unsigned int my_cpu;
1631 uint64_t tsc_timeout;
1632
1633 DBG("mp_kdp_enter()\n");
1634
1635 /*
1636 * Here to enter the debugger.
1637 * In case of races, only one cpu is allowed to enter kdp after
1638 * stopping others.
1639 */
1640 mp_kdp_state = ml_set_interrupts_enabled(FALSE);
1641 my_cpu = cpu_number();
1642
1643 if (my_cpu == (unsigned) debugger_cpu) {
1644 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n");
1645 kdp_reset();
1646 return;
1647 }
1648
1649 uint64_t start_time = cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
1650 int locked = 0;
1651 while (!locked || mp_kdp_trap) {
1652 if (locked) {
1653 simple_unlock(&x86_topo_lock);
1654 }
1655 if (proceed_on_failure) {
1656 if (mach_absolute_time() - start_time > 500000000ll) {
1657 paniclog_append_noflush("mp_kdp_enter() can't get x86_topo_lock! Debugging anyway! #YOLO\n");
1658 break;
1659 }
1660 locked = simple_lock_try(&x86_topo_lock, LCK_GRP_NULL);
1661 if (!locked) {
1662 cpu_pause();
1663 }
1664 } else {
1665 mp_safe_spin_lock(&x86_topo_lock);
1666 locked = TRUE;
1667 }
1668
1669 if (locked && mp_kdp_trap) {
1670 simple_unlock(&x86_topo_lock);
1671 DBG("mp_kdp_enter() race lost\n");
1672 #if MACH_KDP
1673 mp_kdp_wait(TRUE, FALSE);
1674 #endif
1675 locked = FALSE;
1676 }
1677 }
1678
1679 if (pmsafe_debug && !kdp_snapshot) {
1680 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
1681 }
1682
1683 debugger_cpu = my_cpu;
1684 ncpus = 1;
1685 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1686 mp_kdp_trap = TRUE;
1687 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time;
1688
1689 /*
1690 * Deliver a nudge to other cpus, counting how many
1691 */
1692 DBG("mp_kdp_enter() signaling other processors\n");
1693 if (force_immediate_debugger_NMI == FALSE) {
1694 for (cpu = 0; cpu < real_ncpus; cpu++) {
1695 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1696 continue;
1697 }
1698 ncpus++;
1699 i386_signal_cpu(cpu, MP_KDP, ASYNC);
1700 }
1701 /*
1702 * Wait other processors to synchronize
1703 */
1704 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus);
1705
1706 /*
1707 * This timeout is rather arbitrary; we don't want to NMI
1708 * processors that are executing at potentially
1709 * "unsafe-to-interrupt" points such as the trampolines,
1710 * but neither do we want to lose state by waiting too long.
1711 */
1712 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1713
1714 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1715 /*
1716 * A TLB shootdown request may be pending--this would
1717 * result in the requesting processor waiting in
1718 * PMAP_UPDATE_TLBS() until this processor deals with it.
1719 * Process it, so it can now enter mp_kdp_wait()
1720 */
1721 handle_pending_TLB_flushes();
1722 cpu_pause();
1723 }
1724 /* If we've timed out, and some processor(s) are still unresponsive,
1725 * interrupt them with an NMI via the local APIC, iff a panic is
1726 * in progress.
1727 */
1728 if (panic_active()) {
1729 NMIPI_enable(TRUE);
1730 }
1731 if (mp_kdp_ncpus != ncpus) {
1732 unsigned int wait_cycles = 0;
1733 if (proceed_on_failure) {
1734 paniclog_append_noflush("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1735 } else {
1736 DBG("mp_kdp_enter() timed-out on cpu %d, NMI-ing\n", my_cpu);
1737 }
1738 for (cpu = 0; cpu < real_ncpus; cpu++) {
1739 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1740 continue;
1741 }
1742 if (cpu_signal_pending(cpu, MP_KDP)) {
1743 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
1744 cpu_NMI_interrupt(cpu);
1745 }
1746 }
1747 /* Wait again for the same timeout */
1748 tsc_timeout = rdtsc64() + (LockTimeOutTSC);
1749 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) {
1750 handle_pending_TLB_flushes();
1751 cpu_pause();
1752 ++wait_cycles;
1753 }
1754 if (mp_kdp_ncpus != ncpus) {
1755 paniclog_append_noflush("mp_kdp_enter() NMI pending on cpus:");
1756 for (cpu = 0; cpu < real_ncpus; cpu++) {
1757 if (cpu_is_running(cpu) && !cpu_datap(cpu)->cpu_NMI_acknowledged) {
1758 paniclog_append_noflush(" %d", cpu);
1759 }
1760 }
1761 paniclog_append_noflush("\n");
1762 if (proceed_on_failure) {
1763 paniclog_append_noflush("mp_kdp_enter() timed-out during %s wait after NMI;"
1764 "expected %u acks but received %lu after %u loops in %llu ticks\n",
1765 (locked ? "locked" : "unlocked"), ncpus, mp_kdp_ncpus, wait_cycles, LockTimeOutTSC);
1766 } else {
1767 panic("mp_kdp_enter() timed-out during %s wait after NMI;"
1768 "expected %u acks but received %lu after %u loops in %llu ticks",
1769 (locked ? "locked" : "unlocked"), ncpus, mp_kdp_ncpus, wait_cycles, LockTimeOutTSC);
1770 }
1771 }
1772 }
1773 } else if (NMI_panic_reason != PTE_CORRUPTION) { /* In the pte corruption case, the detecting CPU has already NMIed other CPUs */
1774 for (cpu = 0; cpu < real_ncpus; cpu++) {
1775 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1776 continue;
1777 }
1778 cpu_NMI_interrupt(cpu);
1779 }
1780 }
1781
1782 if (locked) {
1783 simple_unlock(&x86_topo_lock);
1784 }
1785
1786 DBG("mp_kdp_enter() %d processors done %s\n",
1787 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
1788
1789 postcode(MP_KDP_ENTER);
1790 }
1791
1792 boolean_t
mp_kdp_all_cpus_halted()1793 mp_kdp_all_cpus_halted()
1794 {
1795 unsigned int ncpus = 0, cpu = 0, my_cpu = 0;
1796
1797 my_cpu = cpu_number();
1798 ncpus = 1; /* current CPU */
1799 for (cpu = 0; cpu < real_ncpus; cpu++) {
1800 if (cpu == my_cpu || !cpu_is_running(cpu)) {
1801 continue;
1802 }
1803 ncpus++;
1804 }
1805
1806 return mp_kdp_ncpus == ncpus;
1807 }
1808
1809 static boolean_t
cpu_signal_pending(int cpu,mp_event_t event)1810 cpu_signal_pending(int cpu, mp_event_t event)
1811 {
1812 volatile int *signals = &cpu_datap(cpu)->cpu_signals;
1813 boolean_t retval = FALSE;
1814
1815 if (i_bit(event, signals)) {
1816 retval = TRUE;
1817 }
1818 return retval;
1819 }
1820
1821 long
kdp_x86_xcpu_invoke(const uint16_t lcpu,kdp_x86_xcpu_func_t func,void * arg0,void * arg1,uint64_t timeout)1822 kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func,
1823 void *arg0, void *arg1, uint64_t timeout)
1824 {
1825 uint64_t now;
1826
1827 if (lcpu > (real_ncpus - 1)) {
1828 return -1;
1829 }
1830
1831 if (func == NULL) {
1832 return -1;
1833 }
1834
1835 kdp_xcpu_call_func.func = func;
1836 kdp_xcpu_call_func.ret = -1;
1837 kdp_xcpu_call_func.arg0 = arg0;
1838 kdp_xcpu_call_func.arg1 = arg1;
1839 kdp_xcpu_call_func.cpu = lcpu;
1840 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu);
1841 now = mach_absolute_time();
1842 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE &&
1843 (timeout == 0 || (mach_absolute_time() - now) < timeout)) {
1844 cpu_pause();
1845 }
1846 return kdp_xcpu_call_func.ret;
1847 }
1848
1849 static void
kdp_x86_xcpu_poll(void)1850 kdp_x86_xcpu_poll(void)
1851 {
1852 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) {
1853 kdp_xcpu_call_func.ret =
1854 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0,
1855 kdp_xcpu_call_func.arg1,
1856 cpu_number());
1857 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE;
1858 }
1859 }
1860
1861 static void
mp_kdp_wait(boolean_t flush,boolean_t isNMI)1862 mp_kdp_wait(boolean_t flush, boolean_t isNMI)
1863 {
1864 DBG("mp_kdp_wait()\n");
1865
1866 current_cpu_datap()->debugger_ipi_time = mach_absolute_time();
1867 #if CONFIG_MCA
1868 /* If we've trapped due to a machine-check, save MCA registers */
1869 mca_check_save();
1870 #endif
1871
1872 atomic_incl((volatile long *)&mp_kdp_ncpus, 1);
1873 while (mp_kdp_trap || (isNMI == TRUE)) {
1874 /*
1875 * A TLB shootdown request may be pending--this would result
1876 * in the requesting processor waiting in PMAP_UPDATE_TLBS()
1877 * until this processor handles it.
1878 * Process it, so it can now enter mp_kdp_wait()
1879 */
1880 if (flush) {
1881 handle_pending_TLB_flushes();
1882 }
1883
1884 kdp_x86_xcpu_poll();
1885 cpu_pause();
1886 }
1887
1888 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1889 DBG("mp_kdp_wait() done\n");
1890 }
1891
1892 void
mp_kdp_exit(void)1893 mp_kdp_exit(void)
1894 {
1895 DBG("mp_kdp_exit()\n");
1896 debugger_cpu = -1;
1897 atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
1898
1899 debugger_exit_time = mach_absolute_time();
1900
1901 mp_kdp_trap = FALSE;
1902 mfence();
1903
1904 /* Wait other processors to stop spinning. XXX needs timeout */
1905 DBG("mp_kdp_exit() waiting for processors to resume\n");
1906 while (mp_kdp_ncpus > 0) {
1907 /*
1908 * a TLB shootdown request may be pending... this would result in the requesting
1909 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it.
1910 * Process it, so it can now enter mp_kdp_wait()
1911 */
1912 handle_pending_TLB_flushes();
1913
1914 cpu_pause();
1915 }
1916
1917 if (pmsafe_debug && !kdp_snapshot) {
1918 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
1919 }
1920
1921 debugger_exit_time = mach_absolute_time();
1922
1923 DBG("mp_kdp_exit() done\n");
1924 (void) ml_set_interrupts_enabled(mp_kdp_state);
1925 postcode(MP_KDP_EXIT);
1926 }
1927
1928 #endif /* MACH_KDP */
1929
1930 boolean_t
mp_recent_debugger_activity(void)1931 mp_recent_debugger_activity(void)
1932 {
1933 uint64_t abstime = mach_absolute_time();
1934 return ((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) ||
1935 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance);
1936 }
1937
1938 /*ARGSUSED*/
1939 void
init_ast_check(__unused processor_t processor)1940 init_ast_check(
1941 __unused processor_t processor)
1942 {
1943 }
1944
1945 void
cause_ast_check(processor_t processor)1946 cause_ast_check(
1947 processor_t processor)
1948 {
1949 int cpu = processor->cpu_id;
1950
1951 if (cpu != cpu_number()) {
1952 i386_signal_cpu(cpu, MP_AST, ASYNC);
1953 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
1954 }
1955 }
1956
1957 void
slave_machine_init(void * param)1958 slave_machine_init(void *param)
1959 {
1960 /*
1961 * Here in process context, but with interrupts disabled.
1962 */
1963 DBG("slave_machine_init() CPU%d\n", get_cpu_number());
1964
1965 if (param == FULL_SLAVE_INIT) {
1966 /*
1967 * Cold start
1968 */
1969 clock_init();
1970 }
1971 cpu_machine_init(); /* Interrupts enabled hereafter */
1972 }
1973
1974 #undef cpu_number
1975 int
cpu_number(void)1976 cpu_number(void)
1977 {
1978 return get_cpu_number();
1979 }
1980
1981 vm_offset_t
current_percpu_base(void)1982 current_percpu_base(void)
1983 {
1984 return get_current_percpu_base();
1985 }
1986
1987 vm_offset_t
other_percpu_base(int cpu)1988 other_percpu_base(int cpu)
1989 {
1990 return cpu_datap(cpu)->cpu_pcpu_base;
1991 }
1992
1993 static void
cpu_prewarm_init()1994 cpu_prewarm_init()
1995 {
1996 int i;
1997
1998 simple_lock_init(&cpu_warm_lock, 0);
1999 queue_init(&cpu_warm_call_list);
2000 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
2001 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
2002 }
2003 }
2004
2005 static timer_call_t
grab_warm_timer_call()2006 grab_warm_timer_call()
2007 {
2008 spl_t x;
2009 timer_call_t call = NULL;
2010
2011 x = splsched();
2012 simple_lock(&cpu_warm_lock, LCK_GRP_NULL);
2013 if (!queue_empty(&cpu_warm_call_list)) {
2014 call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
2015 }
2016 simple_unlock(&cpu_warm_lock);
2017 splx(x);
2018
2019 return call;
2020 }
2021
2022 static void
free_warm_timer_call(timer_call_t call)2023 free_warm_timer_call(timer_call_t call)
2024 {
2025 spl_t x;
2026
2027 x = splsched();
2028 simple_lock(&cpu_warm_lock, LCK_GRP_NULL);
2029 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
2030 simple_unlock(&cpu_warm_lock);
2031 splx(x);
2032 }
2033
2034 /*
2035 * Runs in timer call context (interrupts disabled).
2036 */
2037 static void
cpu_warm_timer_call_func(timer_call_param_t p0,__unused timer_call_param_t p1)2038 cpu_warm_timer_call_func(
2039 timer_call_param_t p0,
2040 __unused timer_call_param_t p1)
2041 {
2042 free_warm_timer_call((timer_call_t)p0);
2043 return;
2044 }
2045
2046 /*
2047 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
2048 */
2049 static void
_cpu_warm_setup(void * arg)2050 _cpu_warm_setup(
2051 void *arg)
2052 {
2053 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
2054
2055 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL);
2056 cwdp->cwd_result = 0;
2057
2058 return;
2059 }
2060
2061 /*
2062 * Not safe to call with interrupts disabled.
2063 */
2064 kern_return_t
ml_interrupt_prewarm(uint64_t deadline)2065 ml_interrupt_prewarm(
2066 uint64_t deadline)
2067 {
2068 struct cpu_warm_data cwd;
2069 timer_call_t call;
2070 cpu_t ct;
2071
2072 if (ml_get_interrupts_enabled() == FALSE) {
2073 panic("%s: Interrupts disabled?", __FUNCTION__);
2074 }
2075
2076 /*
2077 * If the platform doesn't need our help, say that we succeeded.
2078 */
2079 if (!ml_get_interrupt_prewake_applicable()) {
2080 return KERN_SUCCESS;
2081 }
2082
2083 /*
2084 * Grab a timer call to use.
2085 */
2086 call = grab_warm_timer_call();
2087 if (call == NULL) {
2088 return KERN_RESOURCE_SHORTAGE;
2089 }
2090
2091 timer_call_setup(call, cpu_warm_timer_call_func, call);
2092 cwd.cwd_call = call;
2093 cwd.cwd_deadline = deadline;
2094 cwd.cwd_result = 0;
2095
2096 /*
2097 * For now, non-local interrupts happen on the master processor.
2098 */
2099 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
2100 if (ct == 0) {
2101 free_warm_timer_call(call);
2102 return KERN_FAILURE;
2103 } else {
2104 return cwd.cwd_result;
2105 }
2106 }
2107
2108 #if DEBUG || DEVELOPMENT
2109 void
kernel_spin(uint64_t spin_ns)2110 kernel_spin(uint64_t spin_ns)
2111 {
2112 boolean_t istate;
2113 uint64_t spin_abs;
2114 uint64_t deadline;
2115 cpu_data_t *cdp;
2116
2117 kprintf("kernel_spin(%llu) spinning uninterruptibly\n", spin_ns);
2118 istate = ml_set_interrupts_enabled(FALSE);
2119 cdp = current_cpu_datap();
2120 nanoseconds_to_absolutetime(spin_ns, &spin_abs);
2121
2122 /* Fake interrupt handler entry for testing mp_interrupt_watchdog() */
2123 cdp->cpu_int_event_time = mach_absolute_time();
2124 cdp->cpu_int_state = (void *) USER_STATE(current_thread());
2125
2126 deadline = mach_absolute_time() + spin_ns;
2127 while (mach_absolute_time() < deadline) {
2128 cpu_pause();
2129 }
2130
2131 cdp->cpu_int_event_time = 0;
2132 cdp->cpu_int_state = NULL;
2133
2134 ml_set_interrupts_enabled(istate);
2135 kprintf("kernel_spin() continuing\n");
2136 }
2137
2138 /*
2139 * Called from the scheduler's maintenance thread,
2140 * scan running processors for long-running ISRs and:
2141 * - panic if longer than LockTimeOut, or
2142 * - log if more than a quantum.
2143 */
2144 void
mp_interrupt_watchdog(void)2145 mp_interrupt_watchdog(void)
2146 {
2147 cpu_t cpu;
2148 boolean_t intrs_enabled = FALSE;
2149 uint16_t cpu_int_num;
2150 uint64_t cpu_int_event_time;
2151 uint64_t cpu_rip;
2152 uint64_t cpu_int_duration;
2153 uint64_t now;
2154 x86_saved_state_t *cpu_int_state;
2155
2156 if (__improbable(!mp_interrupt_watchdog_enabled)) {
2157 return;
2158 }
2159
2160 intrs_enabled = ml_set_interrupts_enabled(FALSE);
2161 now = mach_absolute_time();
2162 /*
2163 * While timeouts are not suspended,
2164 * check all other processors for long outstanding interrupt handling.
2165 */
2166 for (cpu = 0;
2167 cpu < (cpu_t) real_ncpus && !machine_timeout_suspended();
2168 cpu++) {
2169 if ((cpu == (cpu_t) cpu_number()) ||
2170 (!cpu_is_running(cpu))) {
2171 continue;
2172 }
2173 cpu_int_event_time = cpu_datap(cpu)->cpu_int_event_time;
2174 if (cpu_int_event_time == 0) {
2175 continue;
2176 }
2177 if (__improbable(now < cpu_int_event_time)) {
2178 continue; /* skip due to inter-processor skew */
2179 }
2180 cpu_int_state = cpu_datap(cpu)->cpu_int_state;
2181 if (__improbable(cpu_int_state == NULL)) {
2182 /* The interrupt may have been dismissed */
2183 continue;
2184 }
2185
2186 /* Here with a cpu handling an interrupt */
2187
2188 cpu_int_duration = now - cpu_int_event_time;
2189 if (__improbable(cpu_int_duration > LockTimeOut)) {
2190 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2191 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2192 vector_timed_out = cpu_int_num;
2193 NMIPI_panic(cpu_to_cpumask(cpu), INTERRUPT_WATCHDOG);
2194 panic("Interrupt watchdog, "
2195 "cpu: %d interrupt: 0x%x time: %llu..%llu state: %p RIP: 0x%llx",
2196 cpu, cpu_int_num, cpu_int_event_time, now, cpu_int_state, cpu_rip);
2197 /* NOT REACHED */
2198 } else if (__improbable(cpu_int_duration > (uint64_t) std_quantum)) {
2199 mp_interrupt_watchdog_events++;
2200 cpu_int_num = saved_state64(cpu_int_state)->isf.trapno;
2201 cpu_rip = saved_state64(cpu_int_state)->isf.rip;
2202 ml_set_interrupts_enabled(intrs_enabled);
2203 printf("Interrupt watchdog, "
2204 "cpu: %d interrupt: 0x%x time: %llu..%llu RIP: 0x%llx\n",
2205 cpu, cpu_int_num, cpu_int_event_time, now, cpu_rip);
2206 return;
2207 }
2208 }
2209
2210 ml_set_interrupts_enabled(intrs_enabled);
2211 }
2212 #endif
2213