xref: /xnu-8796.121.2/osfmk/i386/pmCPU.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * CPU-specific power management support.
31  *
32  * Implements the "wrappers" to the KEXT.
33  */
34 #include <i386/asm.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/mp.h>
37 #include <i386/machine_routines.h>
38 #include <i386/proc_reg.h>
39 #include <i386/pmap.h>
40 #include <i386/misc_protos.h>
41 #include <kern/machine.h>
42 #include <kern/pms.h>
43 #include <kern/processor.h>
44 #include <kern/timer_queue.h>
45 #include <i386/cpu_threads.h>
46 #include <i386/pmCPU.h>
47 #include <i386/cpuid.h>
48 #include <i386/rtclock_protos.h>
49 #include <kern/sched_prim.h>
50 #include <i386/lapic.h>
51 #include <i386/pal_routines.h>
52 #include <sys/kdebug.h>
53 #include <i386/tsc.h>
54 
55 #include <kern/sched_urgency.h>
56 
57 extern int disableConsoleOutput;
58 
59 #define DELAY_UNSET             0xFFFFFFFFFFFFFFFFULL
60 
61 uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC};
62 uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
63 
64 /*
65  * The following is set when the KEXT loads and initializes.
66  */
67 pmDispatch_t    *pmDispatch     = NULL;
68 
69 uint32_t                pmInitDone              = 0;
70 static boolean_t        earlyTopology           = FALSE;
71 static uint64_t         earlyMaxBusDelay        = DELAY_UNSET;
72 static uint64_t         earlyMaxIntDelay        = DELAY_UNSET;
73 
74 /*
75  * Initialize the Cstate change code.
76  */
77 void
power_management_init(void)78 power_management_init(void)
79 {
80 	if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) {
81 		(*pmDispatch->cstateInit)();
82 	}
83 }
84 
85 static inline void
machine_classify_interval(uint64_t interval,uint64_t * bins,uint64_t * binvals,uint32_t nbins)86 machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins)
87 {
88 	uint32_t i;
89 	for (i = 0; i < nbins; i++) {
90 		if (interval < binvals[i]) {
91 			bins[i]++;
92 			break;
93 		}
94 	}
95 }
96 
97 uint64_t        idle_pending_timers_processed;
98 uint32_t        idle_entry_timer_processing_hdeadline_threshold = 5000000;
99 
100 /*
101  * Called when the CPU is idle.  It calls into the power management kext
102  * to determine the best way to idle the CPU.
103  */
104 void
machine_idle(void)105 machine_idle(void)
106 {
107 	cpu_data_t              *my_cpu         = current_cpu_datap();
108 	__unused uint32_t       cnum = my_cpu->cpu_number;
109 	uint64_t                ctime, rtime, itime;
110 #if CST_DEMOTION_DEBUG
111 	processor_t             cproc = my_cpu->cpu_processor;
112 	uint64_t                cwakeups = my_cpu->cpu_wakeups_issued_total;
113 #endif /* CST_DEMOTION_DEBUG */
114 	uint64_t esdeadline, ehdeadline;
115 	boolean_t do_process_pending_timers = FALSE;
116 
117 	ctime = mach_absolute_time();
118 	esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
119 	ehdeadline = my_cpu->rtclock_timer.deadline;
120 /* Determine if pending timers exist */
121 	if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
122 	    ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
123 		idle_pending_timers_processed++;
124 		do_process_pending_timers = TRUE;
125 		goto machine_idle_exit;
126 	} else {
127 		TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
128 	}
129 
130 	my_cpu->lcpu.state = LCPU_IDLE;
131 	DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
132 	MARK_CPU_IDLE(cnum);
133 
134 	rtime = ctime - my_cpu->cpu_ixtime;
135 
136 	my_cpu->cpu_rtime_total += rtime;
137 	machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
138 #if CST_DEMOTION_DEBUG
139 	uint32_t cl = 0, ch = 0;
140 	uint64_t c3res, c6res, c7res;
141 	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
142 	c3res = ((uint64_t)ch << 32) | cl;
143 	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
144 	c6res = ((uint64_t)ch << 32) | cl;
145 	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
146 	c7res = ((uint64_t)ch << 32) | cl;
147 #endif
148 
149 	if (pmInitDone) {
150 		/*
151 		 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
152 		 * were called prior to the CPU PM kext being registered.  We do
153 		 * this here since we know at this point the values will be first
154 		 * used since idle is where the decisions using these values is made.
155 		 */
156 		if (earlyMaxBusDelay != DELAY_UNSET) {
157 			ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
158 		}
159 		if (earlyMaxIntDelay != DELAY_UNSET) {
160 			ml_set_maxintdelay(earlyMaxIntDelay);
161 		}
162 	}
163 
164 	if (pmInitDone
165 	    && pmDispatch != NULL
166 	    && pmDispatch->MachineIdle != NULL) {
167 		(*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
168 	} else {
169 		/*
170 		 * If no power management, re-enable interrupts and halt.
171 		 * This will keep the CPU from spinning through the scheduler
172 		 * and will allow at least some minimal power savings (but it
173 		 * cause problems in some MP configurations w.r.t. the APIC
174 		 * stopping during a GV3 transition).
175 		 */
176 		pal_hlt();
177 		/* Once woken, re-disable interrupts. */
178 		pal_cli();
179 	}
180 
181 	/*
182 	 * Mark the CPU as running again.
183 	 */
184 	MARK_CPU_ACTIVE(cnum);
185 	DBGLOG(cpu_handle, cnum, MP_UNIDLE);
186 	my_cpu->lcpu.state = LCPU_RUN;
187 	uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
188 	itime = ixtime - ctime;
189 	my_cpu->cpu_idle_exits++;
190 	my_cpu->cpu_itime_total += itime;
191 	machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
192 #if CST_DEMOTION_DEBUG
193 	cl = ch = 0;
194 	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
195 	c3res = (((uint64_t)ch << 32) | cl) - c3res;
196 	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
197 	c6res = (((uint64_t)ch << 32) | cl) - c6res;
198 	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
199 	c7res = (((uint64_t)ch << 32) | cl) - c7res;
200 
201 	uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
202 	KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
203 	if ((itime > 1000000) && (ndelta > 250000)) {
204 		KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
205 	}
206 #endif
207 
208 machine_idle_exit:
209 	/*
210 	 * Re-enable interrupts.
211 	 */
212 
213 	pal_sti();
214 
215 	if (do_process_pending_timers) {
216 		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
217 
218 		/* Adjust to reflect that this isn't truly a package idle exit */
219 		__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
220 		lapic_timer_swi(); /* Trigger software timer interrupt */
221 		__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
222 
223 		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
224 	}
225 #if CST_DEMOTION_DEBUG
226 	uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total;
227 
228 	if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
229 		KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
230 	}
231 #endif
232 }
233 
234 /*
235  * Called when the CPU is to be halted.  It will choose the best C-State
236  * to be in.
237  */
238 void
pmCPUHalt(uint32_t reason)239 pmCPUHalt(uint32_t reason)
240 {
241 	cpu_data_t  *cpup   = current_cpu_datap();
242 
243 	switch (reason) {
244 	case PM_HALT_DEBUG:
245 		cpup->lcpu.state = LCPU_PAUSE;
246 		pal_stop_cpu(FALSE);
247 		break;
248 
249 	case PM_HALT_PANIC:
250 		cpup->lcpu.state = LCPU_PAUSE;
251 		pal_stop_cpu(TRUE);
252 		break;
253 
254 	case PM_HALT_NORMAL:
255 	case PM_HALT_SLEEP:
256 	default:
257 		pal_cli();
258 
259 		if (pmInitDone
260 		    && pmDispatch != NULL
261 		    && pmDispatch->pmCPUHalt != NULL) {
262 			/*
263 			 * Halt the CPU (and put it in a low power state.
264 			 */
265 			(*pmDispatch->pmCPUHalt)();
266 
267 			/*
268 			 * We've exited halt, so get the CPU schedulable again.
269 			 * - by calling the fast init routine for a slave, or
270 			 * - by returning if we're the master processor.
271 			 */
272 			if (cpup->cpu_number != master_cpu) {
273 				i386_init_slave_fast();
274 				panic("init_slave_fast returned");
275 			}
276 		} else {
277 			/*
278 			 * If no power managment and a processor is taken off-line,
279 			 * then invalidate the cache and halt it (it will not be able
280 			 * to be brought back on-line without resetting the CPU).
281 			 */
282 			__asm__ volatile ("wbinvd");
283 			cpup->lcpu.state = LCPU_HALT;
284 			pal_stop_cpu(FALSE);
285 
286 			panic("back from Halt");
287 		}
288 
289 		break;
290 	}
291 }
292 
293 void
pmMarkAllCPUsOff(void)294 pmMarkAllCPUsOff(void)
295 {
296 	if (pmInitDone
297 	    && pmDispatch != NULL
298 	    && pmDispatch->markAllCPUsOff != NULL) {
299 		(*pmDispatch->markAllCPUsOff)();
300 	}
301 }
302 
303 static void
pmInitComplete(void)304 pmInitComplete(void)
305 {
306 	if (earlyTopology
307 	    && pmDispatch != NULL
308 	    && pmDispatch->pmCPUStateInit != NULL) {
309 		(*pmDispatch->pmCPUStateInit)();
310 		earlyTopology = FALSE;
311 	}
312 	pmInitDone = 1;
313 }
314 
315 x86_lcpu_t *
pmGetLogicalCPU(int cpu)316 pmGetLogicalCPU(int cpu)
317 {
318 	return cpu_to_lcpu(cpu);
319 }
320 
321 x86_lcpu_t *
pmGetMyLogicalCPU(void)322 pmGetMyLogicalCPU(void)
323 {
324 	cpu_data_t  *cpup   = current_cpu_datap();
325 
326 	return &cpup->lcpu;
327 }
328 
329 static x86_core_t *
pmGetCore(int cpu)330 pmGetCore(int cpu)
331 {
332 	return cpu_to_core(cpu);
333 }
334 
335 static x86_core_t *
pmGetMyCore(void)336 pmGetMyCore(void)
337 {
338 	cpu_data_t  *cpup   = current_cpu_datap();
339 
340 	return cpup->lcpu.core;
341 }
342 
343 static x86_die_t *
pmGetDie(int cpu)344 pmGetDie(int cpu)
345 {
346 	return cpu_to_die(cpu);
347 }
348 
349 static x86_die_t *
pmGetMyDie(void)350 pmGetMyDie(void)
351 {
352 	cpu_data_t  *cpup   = current_cpu_datap();
353 
354 	return cpup->lcpu.die;
355 }
356 
357 static x86_pkg_t *
pmGetPackage(int cpu)358 pmGetPackage(int cpu)
359 {
360 	return cpu_to_package(cpu);
361 }
362 
363 static x86_pkg_t *
pmGetMyPackage(void)364 pmGetMyPackage(void)
365 {
366 	cpu_data_t  *cpup   = current_cpu_datap();
367 
368 	return cpup->lcpu.package;
369 }
370 
371 static void
pmLockCPUTopology(int lock)372 pmLockCPUTopology(int lock)
373 {
374 	if (lock) {
375 		mp_safe_spin_lock(&x86_topo_lock);
376 	} else {
377 		simple_unlock(&x86_topo_lock);
378 	}
379 }
380 
381 /*
382  * Called to get the next deadline that has been set by the
383  * power management code.
384  * Note: a return of 0 from AICPM and this routine signifies
385  * that no deadline is set.
386  */
387 uint64_t
pmCPUGetDeadline(cpu_data_t * cpu)388 pmCPUGetDeadline(cpu_data_t *cpu)
389 {
390 	uint64_t    deadline        = 0;
391 
392 	if (pmInitDone
393 	    && pmDispatch != NULL
394 	    && pmDispatch->GetDeadline != NULL) {
395 		deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
396 	}
397 
398 	return deadline;
399 }
400 
401 /*
402  * Called to determine if the supplied deadline or the power management
403  * deadline is sooner.  Returns which ever one is first.
404  */
405 
406 uint64_t
pmCPUSetDeadline(cpu_data_t * cpu,uint64_t deadline)407 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
408 {
409 	if (pmInitDone
410 	    && pmDispatch != NULL
411 	    && pmDispatch->SetDeadline != NULL) {
412 		deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
413 	}
414 
415 	return deadline;
416 }
417 
418 /*
419  * Called when a power management deadline expires.
420  */
421 void
pmCPUDeadline(cpu_data_t * cpu)422 pmCPUDeadline(cpu_data_t *cpu)
423 {
424 	if (pmInitDone
425 	    && pmDispatch != NULL
426 	    && pmDispatch->Deadline != NULL) {
427 		(*pmDispatch->Deadline)(&cpu->lcpu);
428 	}
429 }
430 
431 /*
432  * Called to get a CPU out of idle.
433  */
434 boolean_t
pmCPUExitIdle(cpu_data_t * cpu)435 pmCPUExitIdle(cpu_data_t *cpu)
436 {
437 	boolean_t           do_ipi;
438 
439 	if (pmInitDone
440 	    && pmDispatch != NULL
441 	    && pmDispatch->exitIdle != NULL) {
442 		do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
443 	} else {
444 		do_ipi = TRUE;
445 	}
446 
447 	return do_ipi;
448 }
449 
450 kern_return_t
pmCPUExitHalt(int cpu)451 pmCPUExitHalt(int cpu)
452 {
453 	kern_return_t       rc      = KERN_INVALID_ARGUMENT;
454 
455 	if (pmInitDone
456 	    && pmDispatch != NULL
457 	    && pmDispatch->exitHalt != NULL) {
458 		rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
459 	}
460 
461 	return rc;
462 }
463 
464 kern_return_t
pmCPUExitHaltToOff(int cpu)465 pmCPUExitHaltToOff(int cpu)
466 {
467 	kern_return_t       rc      = KERN_SUCCESS;
468 
469 	if (pmInitDone
470 	    && pmDispatch != NULL
471 	    && pmDispatch->exitHaltToOff != NULL) {
472 		rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
473 	}
474 
475 	return rc;
476 }
477 
478 /*
479  * Called to initialize the power management structures for the CPUs.
480  */
481 void
pmCPUStateInit(void)482 pmCPUStateInit(void)
483 {
484 	if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) {
485 		(*pmDispatch->pmCPUStateInit)();
486 	} else {
487 		earlyTopology = TRUE;
488 	}
489 }
490 
491 /*
492  * Called when a CPU is being restarted after being powered off (as in S3).
493  */
494 void
pmCPUMarkRunning(cpu_data_t * cpu)495 pmCPUMarkRunning(cpu_data_t *cpu)
496 {
497 	cpu_data_t  *cpup   = current_cpu_datap();
498 
499 	if (pmInitDone
500 	    && pmDispatch != NULL
501 	    && pmDispatch->markCPURunning != NULL) {
502 		(*pmDispatch->markCPURunning)(&cpu->lcpu);
503 	} else {
504 		cpup->lcpu.state = LCPU_RUN;
505 	}
506 }
507 
508 /*
509  * Called to get/set CPU power management state.
510  */
511 int
pmCPUControl(uint32_t cmd,void * datap)512 pmCPUControl(uint32_t cmd, void *datap)
513 {
514 	int         rc      = -1;
515 
516 	if (pmDispatch != NULL
517 	    && pmDispatch->pmCPUControl != NULL) {
518 		rc = (*pmDispatch->pmCPUControl)(cmd, datap);
519 	}
520 
521 	return rc;
522 }
523 
524 /*
525  * Called to save the timer state used by power management prior
526  * to "sleeping".
527  */
528 void
pmTimerSave(void)529 pmTimerSave(void)
530 {
531 	if (pmDispatch != NULL
532 	    && pmDispatch->pmTimerStateSave != NULL) {
533 		(*pmDispatch->pmTimerStateSave)();
534 	}
535 }
536 
537 /*
538  * Called to restore the timer state used by power management after
539  * waking from "sleep".
540  */
541 void
pmTimerRestore(void)542 pmTimerRestore(void)
543 {
544 	if (pmDispatch != NULL
545 	    && pmDispatch->pmTimerStateRestore != NULL) {
546 		(*pmDispatch->pmTimerStateRestore)();
547 	}
548 }
549 
550 /*
551  * Set the worst-case time for the C4 to C2 transition.
552  * No longer does anything.
553  */
554 void
ml_set_maxsnoop(__unused uint32_t maxdelay)555 ml_set_maxsnoop(__unused uint32_t maxdelay)
556 {
557 }
558 
559 
560 /*
561  * Get the worst-case time for the C4 to C2 transition.  Returns nanoseconds.
562  */
563 unsigned
ml_get_maxsnoop(void)564 ml_get_maxsnoop(void)
565 {
566 	uint64_t    max_snoop       = 0;
567 
568 	if (pmInitDone
569 	    && pmDispatch != NULL
570 	    && pmDispatch->getMaxSnoop != NULL) {
571 		max_snoop = pmDispatch->getMaxSnoop();
572 	}
573 
574 	return (unsigned)(max_snoop & 0xffffffff);
575 }
576 
577 
578 uint32_t
ml_get_maxbusdelay(void)579 ml_get_maxbusdelay(void)
580 {
581 	uint64_t    max_delay       = 0;
582 
583 	if (pmInitDone
584 	    && pmDispatch != NULL
585 	    && pmDispatch->getMaxBusDelay != NULL) {
586 		max_delay = pmDispatch->getMaxBusDelay();
587 	}
588 
589 	return (uint32_t)(max_delay & 0xffffffff);
590 }
591 
592 /*
593  * Advertise a memory access latency tolerance of "mdelay" ns
594  */
595 void
ml_set_maxbusdelay(uint32_t mdelay)596 ml_set_maxbusdelay(uint32_t mdelay)
597 {
598 	uint64_t    maxdelay        = mdelay;
599 
600 	if (pmDispatch != NULL
601 	    && pmDispatch->setMaxBusDelay != NULL) {
602 		earlyMaxBusDelay = DELAY_UNSET;
603 		pmDispatch->setMaxBusDelay(maxdelay);
604 	} else {
605 		earlyMaxBusDelay = maxdelay;
606 	}
607 }
608 
609 uint64_t
ml_get_maxintdelay(void)610 ml_get_maxintdelay(void)
611 {
612 	uint64_t    max_delay       = 0;
613 
614 	if (pmDispatch != NULL
615 	    && pmDispatch->getMaxIntDelay != NULL) {
616 		max_delay = pmDispatch->getMaxIntDelay();
617 	}
618 
619 	return max_delay;
620 }
621 
622 /*
623  * Set the maximum delay allowed for an interrupt.
624  */
625 void
ml_set_maxintdelay(uint64_t mdelay)626 ml_set_maxintdelay(uint64_t mdelay)
627 {
628 	if (pmDispatch != NULL
629 	    && pmDispatch->setMaxIntDelay != NULL) {
630 		earlyMaxIntDelay = DELAY_UNSET;
631 		pmDispatch->setMaxIntDelay(mdelay);
632 	} else {
633 		earlyMaxIntDelay = mdelay;
634 	}
635 }
636 
637 boolean_t
ml_get_interrupt_prewake_applicable()638 ml_get_interrupt_prewake_applicable()
639 {
640 	boolean_t applicable = FALSE;
641 
642 	if (pmInitDone
643 	    && pmDispatch != NULL
644 	    && pmDispatch->pmInterruptPrewakeApplicable != NULL) {
645 		applicable = pmDispatch->pmInterruptPrewakeApplicable();
646 	}
647 
648 	return applicable;
649 }
650 
651 /*
652  * Put a CPU into "safe" mode with respect to power.
653  *
654  * Some systems cannot operate at a continuous "normal" speed without
655  * exceeding the thermal design.  This is called per-CPU to place the
656  * CPUs into a "safe" operating mode.
657  */
658 void
pmSafeMode(x86_lcpu_t * lcpu,uint32_t flags)659 pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
660 {
661 	if (pmDispatch != NULL
662 	    && pmDispatch->pmCPUSafeMode != NULL) {
663 		pmDispatch->pmCPUSafeMode(lcpu, flags);
664 	} else {
665 		/*
666 		 * Do something reasonable if the KEXT isn't present.
667 		 *
668 		 * We only look at the PAUSE and RESUME flags.  The other flag(s)
669 		 * will not make any sense without the KEXT, so just ignore them.
670 		 *
671 		 * We set the CPU's state to indicate that it's halted.  If this
672 		 * is the CPU we're currently running on, then spin until the
673 		 * state becomes non-halted.
674 		 */
675 		if (flags & PM_SAFE_FL_PAUSE) {
676 			lcpu->state = LCPU_PAUSE;
677 			if (lcpu == x86_lcpu()) {
678 				while (lcpu->state == LCPU_PAUSE) {
679 					cpu_pause();
680 				}
681 			}
682 		}
683 
684 		/*
685 		 * Clear the halted flag for the specified CPU, that will
686 		 * get it out of its spin loop.
687 		 */
688 		if (flags & PM_SAFE_FL_RESUME) {
689 			lcpu->state = LCPU_RUN;
690 		}
691 	}
692 }
693 
694 static uint32_t         saved_run_count = 0;
695 
696 void
machine_run_count(uint32_t count)697 machine_run_count(uint32_t count)
698 {
699 	if (pmDispatch != NULL
700 	    && pmDispatch->pmSetRunCount != NULL) {
701 		pmDispatch->pmSetRunCount(count);
702 	} else {
703 		saved_run_count = count;
704 	}
705 }
706 
707 processor_t
machine_choose_processor(processor_set_t pset,processor_t preferred)708 machine_choose_processor(processor_set_t pset,
709     processor_t preferred)
710 {
711 	int         startCPU;
712 	int         endCPU;
713 	int         preferredCPU;
714 	int         chosenCPU;
715 
716 	if (!pmInitDone) {
717 		return preferred;
718 	}
719 
720 	if (pset == NULL) {
721 		startCPU = -1;
722 		endCPU = -1;
723 	} else {
724 		startCPU = pset->cpu_set_low;
725 		endCPU = pset->cpu_set_hi;
726 	}
727 
728 	if (preferred == NULL) {
729 		preferredCPU = -1;
730 	} else {
731 		preferredCPU = preferred->cpu_id;
732 	}
733 
734 	if (pmDispatch != NULL
735 	    && pmDispatch->pmChooseCPU != NULL) {
736 		chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
737 
738 		if (chosenCPU == -1) {
739 			return NULL;
740 		}
741 		return cpu_datap(chosenCPU)->cpu_processor;
742 	}
743 
744 	return preferred;
745 }
746 
747 static int
pmThreadGetUrgency(uint64_t * rt_period,uint64_t * rt_deadline)748 pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
749 {
750 #pragma unused(rt_period, rt_deadline)
751 	return THREAD_URGENCY_NONE;
752 }
753 
754 #if     DEBUG
755 uint32_t        urgency_stats[64][THREAD_URGENCY_MAX];
756 #endif
757 
758 #define         URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
759 uint64_t        urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
760 
761 void
thread_tell_urgency(thread_urgency_t urgency,uint64_t rt_period,uint64_t rt_deadline,uint64_t sched_latency,thread_t nthread)762 thread_tell_urgency(thread_urgency_t urgency,
763     uint64_t rt_period,
764     uint64_t rt_deadline,
765     uint64_t sched_latency,
766     thread_t nthread)
767 {
768 	uint64_t        urgency_notification_time_start = 0, delta;
769 	boolean_t       urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
770 	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
771 #if     DEBUG
772 	urgency_stats[cpu_number() % 64][urgency]++;
773 #endif
774 	if (!pmInitDone
775 	    || pmDispatch == NULL
776 	    || pmDispatch->pmThreadTellUrgency == NULL) {
777 		return;
778 	}
779 
780 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
781 
782 	if (__improbable((urgency_assert == TRUE))) {
783 		urgency_notification_time_start = mach_absolute_time();
784 	}
785 
786 	current_cpu_datap()->cpu_nthread = nthread;
787 	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
788 
789 	if (__improbable((urgency_assert == TRUE))) {
790 		delta = mach_absolute_time() - urgency_notification_time_start;
791 
792 		if (__improbable(delta > urgency_notification_max_recorded)) {
793 			/* This is not synchronized, but it doesn't matter
794 			 * if we (rarely) miss an event, as it is statistically
795 			 * unlikely that it will never recur.
796 			 */
797 			urgency_notification_max_recorded = delta;
798 
799 			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) {
800 				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
801 			}
802 		}
803 	}
804 
805 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
806 }
807 
808 void
machine_thread_going_on_core(__unused thread_t new_thread,__unused thread_urgency_t urgency,__unused uint64_t sched_latency,__unused uint64_t same_pri_latency,__unused uint64_t dispatch_time)809 machine_thread_going_on_core(__unused thread_t      new_thread,
810     __unused thread_urgency_t           urgency,
811     __unused uint64_t      sched_latency,
812     __unused uint64_t      same_pri_latency,
813     __unused uint64_t      dispatch_time)
814 {
815 }
816 
817 void
machine_thread_going_off_core(thread_t old_thread,boolean_t thread_terminating,uint64_t last_dispatch,boolean_t thread_runnable)818 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
819     uint64_t last_dispatch, boolean_t thread_runnable)
820 {
821 	if (!pmInitDone
822 	    || pmDispatch == NULL
823 	    || pmDispatch->pmThreadGoingOffCore == NULL) {
824 		return;
825 	}
826 
827 	pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating,
828 	    last_dispatch, thread_runnable);
829 }
830 
831 void
machine_max_runnable_latency(__unused uint64_t bg_max_latency,__unused uint64_t default_max_latency,__unused uint64_t realtime_max_latency)832 machine_max_runnable_latency(__unused uint64_t bg_max_latency,
833     __unused uint64_t default_max_latency,
834     __unused uint64_t realtime_max_latency)
835 {
836 }
837 
838 void
machine_work_interval_notify(__unused thread_t thread,__unused struct kern_work_interval_args * kwi_args)839 machine_work_interval_notify(__unused thread_t thread,
840     __unused struct kern_work_interval_args* kwi_args)
841 {
842 }
843 
844 
845 void
machine_switch_perfcontrol_context(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused uint64_t new_thread_same_pri_latency,__unused thread_t old,__unused thread_t new)846 machine_switch_perfcontrol_context(__unused perfcontrol_event event,
847     __unused uint64_t timestamp,
848     __unused uint32_t flags,
849     __unused uint64_t new_thread_same_pri_latency,
850     __unused thread_t old,
851     __unused thread_t new)
852 {
853 }
854 
855 void
machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused thread_t thread)856 machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,
857     __unused uint64_t timestamp,
858     __unused uint32_t flags,
859     __unused thread_t thread)
860 {
861 }
862 
863 void
active_rt_threads(boolean_t active)864 active_rt_threads(boolean_t active)
865 {
866 	if (!pmInitDone
867 	    || pmDispatch == NULL
868 	    || pmDispatch->pmActiveRTThreads == NULL) {
869 		return;
870 	}
871 
872 	pmDispatch->pmActiveRTThreads(active);
873 }
874 
875 static uint32_t
pmGetSavedRunCount(void)876 pmGetSavedRunCount(void)
877 {
878 	return saved_run_count;
879 }
880 
881 /*
882  * Returns the root of the package tree.
883  */
884 x86_pkg_t *
pmGetPkgRoot(void)885 pmGetPkgRoot(void)
886 {
887 	return x86_pkgs;
888 }
889 
890 static boolean_t
pmCPUGetHibernate(int cpu)891 pmCPUGetHibernate(int cpu)
892 {
893 	return cpu_datap(cpu)->cpu_hibernate;
894 }
895 
896 processor_t
pmLCPUtoProcessor(int lcpu)897 pmLCPUtoProcessor(int lcpu)
898 {
899 	return cpu_datap(lcpu)->cpu_processor;
900 }
901 
902 static void
pmReSyncDeadlines(int cpu)903 pmReSyncDeadlines(int cpu)
904 {
905 	static boolean_t    registered      = FALSE;
906 
907 	if (!registered) {
908 		PM_interrupt_register(&timer_resync_deadlines);
909 		registered = TRUE;
910 	}
911 
912 	if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) {
913 		timer_resync_deadlines();
914 	} else {
915 		cpu_PM_interrupt(cpu);
916 	}
917 }
918 
919 static void
pmSendIPI(int cpu)920 pmSendIPI(int cpu)
921 {
922 	lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
923 }
924 
925 static void
pmGetNanotimeInfo(pm_rtc_nanotime_t * rtc_nanotime)926 pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
927 {
928 	/*
929 	 * Make sure that nanotime didn't change while we were reading it.
930 	 */
931 	do {
932 		rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
933 		rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
934 		rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
935 		rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
936 		rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
937 	} while (pal_rtc_nanotime_info.generation != 0
938 	    && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
939 }
940 
941 uint32_t
pmTimerQueueMigrate(int target_cpu)942 pmTimerQueueMigrate(int target_cpu)
943 {
944 	/* Call the etimer code to do this. */
945 	return (target_cpu != cpu_number())
946 	       ? timer_queue_migrate_cpu(target_cpu)
947 	       : 0;
948 }
949 
950 
951 /*
952  * Called by the power management kext to register itself and to get the
953  * callbacks it might need into other kernel functions.  This interface
954  * is versioned to allow for slight mis-matches between the kext and the
955  * kernel.
956  */
957 void
pmKextRegister(uint32_t version,pmDispatch_t * cpuFuncs,pmCallBacks_t * callbacks)958 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
959     pmCallBacks_t *callbacks)
960 {
961 	if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
962 		callbacks->setRTCPop            = setPop;
963 		callbacks->resyncDeadlines      = pmReSyncDeadlines;
964 		callbacks->initComplete         = pmInitComplete;
965 		callbacks->GetLCPU              = pmGetLogicalCPU;
966 		callbacks->GetCore              = pmGetCore;
967 		callbacks->GetDie               = pmGetDie;
968 		callbacks->GetPackage           = pmGetPackage;
969 		callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
970 		callbacks->GetMyCore            = pmGetMyCore;
971 		callbacks->GetMyDie             = pmGetMyDie;
972 		callbacks->GetMyPackage         = pmGetMyPackage;
973 		callbacks->GetPkgRoot           = pmGetPkgRoot;
974 		callbacks->LockCPUTopology      = pmLockCPUTopology;
975 		callbacks->GetHibernate         = pmCPUGetHibernate;
976 		callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
977 		callbacks->ThreadBind           = thread_bind;
978 		callbacks->GetSavedRunCount     = pmGetSavedRunCount;
979 		callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
980 		callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
981 		callbacks->RTCClockAdjust       = rtc_clock_adjust;
982 		callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
983 		callbacks->topoParms            = &topoParms;
984 		callbacks->pmSendIPI            = pmSendIPI;
985 		callbacks->InterruptPending     = lapic_is_interrupt_pending;
986 		callbacks->IsInterrupting       = lapic_is_interrupting;
987 		callbacks->InterruptStats       = lapic_interrupt_counts;
988 		callbacks->DisableApicTimer     = lapic_disable_timer;
989 	} else {
990 		panic("Version mis-match between Kernel and CPU PM");
991 	}
992 
993 	if (cpuFuncs != NULL) {
994 		if (pmDispatch) {
995 			panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
996 		}
997 
998 		pmDispatch = cpuFuncs;
999 
1000 		if (earlyTopology
1001 		    && pmDispatch->pmCPUStateInit != NULL) {
1002 			(*pmDispatch->pmCPUStateInit)();
1003 			earlyTopology = FALSE;
1004 		}
1005 
1006 		if (pmDispatch->pmIPIHandler != NULL) {
1007 			lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
1008 		}
1009 	}
1010 }
1011 
1012 /*
1013  * Unregisters the power management functions from the kext.
1014  */
1015 void
pmUnRegister(pmDispatch_t * cpuFuncs)1016 pmUnRegister(pmDispatch_t *cpuFuncs)
1017 {
1018 	if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
1019 		pmDispatch = NULL;
1020 	}
1021 }
1022 
1023 void
machine_track_platform_idle(boolean_t entry)1024 machine_track_platform_idle(boolean_t entry)
1025 {
1026 	cpu_data_t              *my_cpu         = current_cpu_datap();
1027 
1028 	if (entry) {
1029 		(void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
1030 	} else {
1031 		uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
1032 		if (nidle == topoParms.nLThreadsPerPackage) {
1033 			my_cpu->lcpu.package->package_idle_exits++;
1034 		}
1035 	}
1036 }
1037