xref: /xnu-11215.41.3/osfmk/i386/pmCPU.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * CPU-specific power management support.
31  *
32  * Implements the "wrappers" to the KEXT.
33  */
34 #include <i386/asm.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/mp.h>
37 #include <i386/machine_routines.h>
38 #include <i386/proc_reg.h>
39 #include <i386/pmap.h>
40 #include <i386/misc_protos.h>
41 #include <kern/machine.h>
42 #include <kern/pms.h>
43 #include <kern/processor.h>
44 #include <kern/timer_queue.h>
45 #include <i386/cpu_threads.h>
46 #include <i386/pmCPU.h>
47 #include <i386/cpuid.h>
48 #include <i386/rtclock_protos.h>
49 #include <kern/sched_prim.h>
50 #include <i386/lapic.h>
51 #include <i386/pal_routines.h>
52 #include <sys/kdebug.h>
53 #include <i386/tsc.h>
54 
55 #include <kern/sched_urgency.h>
56 
57 #define DELAY_UNSET             0xFFFFFFFFFFFFFFFFULL
58 
59 uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC};
60 uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
61 
62 /*
63  * The following is set when the KEXT loads and initializes.
64  */
65 pmDispatch_t    *pmDispatch     = NULL;
66 
67 uint32_t                pmInitDone              = 0;
68 static boolean_t        earlyTopology           = FALSE;
69 static uint64_t         earlyMaxBusDelay        = DELAY_UNSET;
70 static uint64_t         earlyMaxIntDelay        = DELAY_UNSET;
71 
72 /*
73  * Initialize the Cstate change code.
74  */
75 void
power_management_init(void)76 power_management_init(void)
77 {
78 	if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) {
79 		(*pmDispatch->cstateInit)();
80 	}
81 }
82 
83 static inline void
machine_classify_interval(uint64_t interval,uint64_t * bins,uint64_t * binvals,uint32_t nbins)84 machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins)
85 {
86 	uint32_t i;
87 	for (i = 0; i < nbins; i++) {
88 		if (interval < binvals[i]) {
89 			bins[i]++;
90 			break;
91 		}
92 	}
93 }
94 
95 uint64_t        idle_pending_timers_processed;
96 uint32_t        idle_entry_timer_processing_hdeadline_threshold = 5000000;
97 
98 /*
99  * Called when the CPU is idle.  It calls into the power management kext
100  * to determine the best way to idle the CPU.
101  */
102 void
machine_idle(void)103 machine_idle(void)
104 {
105 	cpu_data_t              *my_cpu         = current_cpu_datap();
106 	__unused uint32_t       cnum = my_cpu->cpu_number;
107 	uint64_t                ctime, rtime, itime;
108 #if CST_DEMOTION_DEBUG
109 	processor_t             cproc = my_cpu->cpu_processor;
110 	uint64_t                cwakeups = my_cpu->cpu_wakeups_issued_total;
111 #endif /* CST_DEMOTION_DEBUG */
112 	uint64_t esdeadline, ehdeadline;
113 	boolean_t do_process_pending_timers = FALSE;
114 
115 	ctime = mach_absolute_time();
116 	esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
117 	ehdeadline = my_cpu->rtclock_timer.deadline;
118 /* Determine if pending timers exist */
119 	if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
120 	    ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
121 		idle_pending_timers_processed++;
122 		do_process_pending_timers = TRUE;
123 		goto machine_idle_exit;
124 	} else {
125 		TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
126 	}
127 
128 	my_cpu->lcpu.state = LCPU_IDLE;
129 	DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
130 	MARK_CPU_IDLE(cnum);
131 
132 	rtime = ctime - my_cpu->cpu_ixtime;
133 
134 	my_cpu->cpu_rtime_total += rtime;
135 	machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
136 #if CST_DEMOTION_DEBUG
137 	uint32_t cl = 0, ch = 0;
138 	uint64_t c3res, c6res, c7res;
139 	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
140 	c3res = ((uint64_t)ch << 32) | cl;
141 	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
142 	c6res = ((uint64_t)ch << 32) | cl;
143 	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
144 	c7res = ((uint64_t)ch << 32) | cl;
145 #endif
146 
147 	if (pmInitDone) {
148 		/*
149 		 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
150 		 * were called prior to the CPU PM kext being registered.  We do
151 		 * this here since we know at this point the values will be first
152 		 * used since idle is where the decisions using these values is made.
153 		 */
154 		if (earlyMaxBusDelay != DELAY_UNSET) {
155 			ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
156 		}
157 		if (earlyMaxIntDelay != DELAY_UNSET) {
158 			ml_set_maxintdelay(earlyMaxIntDelay);
159 		}
160 	}
161 
162 	if (pmInitDone
163 	    && pmDispatch != NULL
164 	    && pmDispatch->MachineIdle != NULL) {
165 		(*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
166 	} else {
167 		/*
168 		 * If no power management, re-enable interrupts and halt.
169 		 * This will keep the CPU from spinning through the scheduler
170 		 * and will allow at least some minimal power savings (but it
171 		 * cause problems in some MP configurations w.r.t. the APIC
172 		 * stopping during a GV3 transition).
173 		 */
174 		pal_hlt();
175 		/* Once woken, re-disable interrupts. */
176 		pal_cli();
177 	}
178 
179 	/*
180 	 * Mark the CPU as running again.
181 	 */
182 	MARK_CPU_ACTIVE(cnum);
183 	DBGLOG(cpu_handle, cnum, MP_UNIDLE);
184 	my_cpu->lcpu.state = LCPU_RUN;
185 	uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
186 	itime = ixtime - ctime;
187 	my_cpu->cpu_idle_exits++;
188 	my_cpu->cpu_itime_total += itime;
189 	machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
190 #if CST_DEMOTION_DEBUG
191 	cl = ch = 0;
192 	rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
193 	c3res = (((uint64_t)ch << 32) | cl) - c3res;
194 	rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
195 	c6res = (((uint64_t)ch << 32) | cl) - c6res;
196 	rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
197 	c7res = (((uint64_t)ch << 32) | cl) - c7res;
198 
199 	uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
200 	KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
201 	if ((itime > 1000000) && (ndelta > 250000)) {
202 		KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
203 	}
204 #endif
205 
206 machine_idle_exit:
207 	/*
208 	 * Re-enable interrupts.
209 	 */
210 
211 	pal_sti();
212 
213 	if (do_process_pending_timers) {
214 		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
215 
216 		/* Adjust to reflect that this isn't truly a package idle exit */
217 		__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
218 		lapic_timer_swi(); /* Trigger software timer interrupt */
219 		__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
220 
221 		TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
222 	}
223 #if CST_DEMOTION_DEBUG
224 	uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total;
225 
226 	if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
227 		KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
228 	}
229 #endif
230 }
231 
232 /*
233  * Called when the CPU is to be halted.  It will choose the best C-State
234  * to be in.
235  */
236 void
pmCPUHalt(uint32_t reason)237 pmCPUHalt(uint32_t reason)
238 {
239 	cpu_data_t  *cpup   = current_cpu_datap();
240 
241 	switch (reason) {
242 	case PM_HALT_DEBUG:
243 		cpup->lcpu.state = LCPU_PAUSE;
244 		pal_stop_cpu(FALSE);
245 		break;
246 
247 	case PM_HALT_PANIC:
248 		cpup->lcpu.state = LCPU_PAUSE;
249 		pal_stop_cpu(TRUE);
250 		break;
251 
252 	case PM_HALT_NORMAL:
253 	case PM_HALT_SLEEP:
254 	default:
255 		pal_cli();
256 
257 		if (pmInitDone
258 		    && pmDispatch != NULL
259 		    && pmDispatch->pmCPUHalt != NULL) {
260 			/*
261 			 * Halt the CPU (and put it in a low power state.
262 			 */
263 			(*pmDispatch->pmCPUHalt)();
264 
265 			/*
266 			 * We've exited halt, so get the CPU schedulable again.
267 			 * - by calling the fast init routine for a slave, or
268 			 * - by returning if we're the master processor.
269 			 */
270 			if (cpup->cpu_number != master_cpu) {
271 				i386_init_slave_fast();
272 				panic("init_slave_fast returned");
273 			}
274 		} else {
275 			/*
276 			 * If no power managment and a processor is taken off-line,
277 			 * then invalidate the cache and halt it (it will not be able
278 			 * to be brought back on-line without resetting the CPU).
279 			 */
280 			__asm__ volatile ("wbinvd");
281 			cpup->lcpu.state = LCPU_HALT;
282 			pal_stop_cpu(FALSE);
283 
284 			panic("back from Halt");
285 		}
286 
287 		break;
288 	}
289 }
290 
291 void
pmMarkAllCPUsOff(void)292 pmMarkAllCPUsOff(void)
293 {
294 	if (pmInitDone
295 	    && pmDispatch != NULL
296 	    && pmDispatch->markAllCPUsOff != NULL) {
297 		(*pmDispatch->markAllCPUsOff)();
298 	}
299 }
300 
301 static void
pmInitComplete(void)302 pmInitComplete(void)
303 {
304 	if (earlyTopology
305 	    && pmDispatch != NULL
306 	    && pmDispatch->pmCPUStateInit != NULL) {
307 		(*pmDispatch->pmCPUStateInit)();
308 		earlyTopology = FALSE;
309 	}
310 	pmInitDone = 1;
311 }
312 
313 x86_lcpu_t *
pmGetLogicalCPU(int cpu)314 pmGetLogicalCPU(int cpu)
315 {
316 	return cpu_to_lcpu(cpu);
317 }
318 
319 x86_lcpu_t *
pmGetMyLogicalCPU(void)320 pmGetMyLogicalCPU(void)
321 {
322 	cpu_data_t  *cpup   = current_cpu_datap();
323 
324 	return &cpup->lcpu;
325 }
326 
327 static x86_core_t *
pmGetCore(int cpu)328 pmGetCore(int cpu)
329 {
330 	return cpu_to_core(cpu);
331 }
332 
333 static x86_core_t *
pmGetMyCore(void)334 pmGetMyCore(void)
335 {
336 	cpu_data_t  *cpup   = current_cpu_datap();
337 
338 	return cpup->lcpu.core;
339 }
340 
341 static x86_die_t *
pmGetDie(int cpu)342 pmGetDie(int cpu)
343 {
344 	return cpu_to_die(cpu);
345 }
346 
347 static x86_die_t *
pmGetMyDie(void)348 pmGetMyDie(void)
349 {
350 	cpu_data_t  *cpup   = current_cpu_datap();
351 
352 	return cpup->lcpu.die;
353 }
354 
355 static x86_pkg_t *
pmGetPackage(int cpu)356 pmGetPackage(int cpu)
357 {
358 	return cpu_to_package(cpu);
359 }
360 
361 static x86_pkg_t *
pmGetMyPackage(void)362 pmGetMyPackage(void)
363 {
364 	cpu_data_t  *cpup   = current_cpu_datap();
365 
366 	return cpup->lcpu.package;
367 }
368 
369 static void
pmLockCPUTopology(int lock)370 pmLockCPUTopology(int lock)
371 {
372 	if (lock) {
373 		mp_safe_spin_lock(&x86_topo_lock);
374 	} else {
375 		simple_unlock(&x86_topo_lock);
376 	}
377 }
378 
379 /*
380  * Called to get the next deadline that has been set by the
381  * power management code.
382  * Note: a return of 0 from AICPM and this routine signifies
383  * that no deadline is set.
384  */
385 uint64_t
pmCPUGetDeadline(cpu_data_t * cpu)386 pmCPUGetDeadline(cpu_data_t *cpu)
387 {
388 	uint64_t    deadline        = 0;
389 
390 	if (pmInitDone
391 	    && pmDispatch != NULL
392 	    && pmDispatch->GetDeadline != NULL) {
393 		deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
394 	}
395 
396 	return deadline;
397 }
398 
399 /*
400  * Called to determine if the supplied deadline or the power management
401  * deadline is sooner.  Returns which ever one is first.
402  */
403 
404 uint64_t
pmCPUSetDeadline(cpu_data_t * cpu,uint64_t deadline)405 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
406 {
407 	if (pmInitDone
408 	    && pmDispatch != NULL
409 	    && pmDispatch->SetDeadline != NULL) {
410 		deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
411 	}
412 
413 	return deadline;
414 }
415 
416 /*
417  * Called when a power management deadline expires.
418  */
419 void
pmCPUDeadline(cpu_data_t * cpu)420 pmCPUDeadline(cpu_data_t *cpu)
421 {
422 	if (pmInitDone
423 	    && pmDispatch != NULL
424 	    && pmDispatch->Deadline != NULL) {
425 		(*pmDispatch->Deadline)(&cpu->lcpu);
426 	}
427 }
428 
429 /*
430  * Called to get a CPU out of idle.
431  */
432 boolean_t
pmCPUExitIdle(cpu_data_t * cpu)433 pmCPUExitIdle(cpu_data_t *cpu)
434 {
435 	boolean_t           do_ipi;
436 
437 	if (pmInitDone
438 	    && pmDispatch != NULL
439 	    && pmDispatch->exitIdle != NULL) {
440 		do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
441 	} else {
442 		do_ipi = TRUE;
443 	}
444 
445 	return do_ipi;
446 }
447 
448 kern_return_t
pmCPUExitHalt(int cpu)449 pmCPUExitHalt(int cpu)
450 {
451 	kern_return_t       rc      = KERN_INVALID_ARGUMENT;
452 
453 	if (pmInitDone
454 	    && pmDispatch != NULL
455 	    && pmDispatch->exitHalt != NULL) {
456 		rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
457 	}
458 
459 	return rc;
460 }
461 
462 kern_return_t
pmCPUExitHaltToOff(int cpu)463 pmCPUExitHaltToOff(int cpu)
464 {
465 	kern_return_t       rc      = KERN_SUCCESS;
466 
467 	if (pmInitDone
468 	    && pmDispatch != NULL
469 	    && pmDispatch->exitHaltToOff != NULL) {
470 		rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
471 	}
472 
473 	return rc;
474 }
475 
476 /*
477  * Called to initialize the power management structures for the CPUs.
478  */
479 void
pmCPUStateInit(void)480 pmCPUStateInit(void)
481 {
482 	if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) {
483 		(*pmDispatch->pmCPUStateInit)();
484 	} else {
485 		earlyTopology = TRUE;
486 	}
487 }
488 
489 /*
490  * Called when a CPU is being restarted after being powered off (as in S3).
491  */
492 void
pmCPUMarkRunning(cpu_data_t * cpu)493 pmCPUMarkRunning(cpu_data_t *cpu)
494 {
495 	cpu_data_t  *cpup   = current_cpu_datap();
496 
497 	if (pmInitDone
498 	    && pmDispatch != NULL
499 	    && pmDispatch->markCPURunning != NULL) {
500 		(*pmDispatch->markCPURunning)(&cpu->lcpu);
501 	} else {
502 		cpup->lcpu.state = LCPU_RUN;
503 	}
504 }
505 
506 /*
507  * Called to get/set CPU power management state.
508  */
509 int
pmCPUControl(uint32_t cmd,void * datap)510 pmCPUControl(uint32_t cmd, void *datap)
511 {
512 	int         rc      = -1;
513 
514 	if (pmDispatch != NULL
515 	    && pmDispatch->pmCPUControl != NULL) {
516 		rc = (*pmDispatch->pmCPUControl)(cmd, datap);
517 	}
518 
519 	return rc;
520 }
521 
522 /*
523  * Called to save the timer state used by power management prior
524  * to "sleeping".
525  */
526 void
pmTimerSave(void)527 pmTimerSave(void)
528 {
529 	if (pmDispatch != NULL
530 	    && pmDispatch->pmTimerStateSave != NULL) {
531 		(*pmDispatch->pmTimerStateSave)();
532 	}
533 }
534 
535 /*
536  * Called to restore the timer state used by power management after
537  * waking from "sleep".
538  */
539 void
pmTimerRestore(void)540 pmTimerRestore(void)
541 {
542 	if (pmDispatch != NULL
543 	    && pmDispatch->pmTimerStateRestore != NULL) {
544 		(*pmDispatch->pmTimerStateRestore)();
545 	}
546 }
547 
548 /*
549  * Set the worst-case time for the C4 to C2 transition.
550  * No longer does anything.
551  */
552 void
ml_set_maxsnoop(__unused uint32_t maxdelay)553 ml_set_maxsnoop(__unused uint32_t maxdelay)
554 {
555 }
556 
557 
558 /*
559  * Get the worst-case time for the C4 to C2 transition.  Returns nanoseconds.
560  */
561 unsigned
ml_get_maxsnoop(void)562 ml_get_maxsnoop(void)
563 {
564 	uint64_t    max_snoop       = 0;
565 
566 	if (pmInitDone
567 	    && pmDispatch != NULL
568 	    && pmDispatch->getMaxSnoop != NULL) {
569 		max_snoop = pmDispatch->getMaxSnoop();
570 	}
571 
572 	return (unsigned)(max_snoop & 0xffffffff);
573 }
574 
575 
576 uint32_t
ml_get_maxbusdelay(void)577 ml_get_maxbusdelay(void)
578 {
579 	uint64_t    max_delay       = 0;
580 
581 	if (pmInitDone
582 	    && pmDispatch != NULL
583 	    && pmDispatch->getMaxBusDelay != NULL) {
584 		max_delay = pmDispatch->getMaxBusDelay();
585 	}
586 
587 	return (uint32_t)(max_delay & 0xffffffff);
588 }
589 
590 /*
591  * Advertise a memory access latency tolerance of "mdelay" ns
592  */
593 void
ml_set_maxbusdelay(uint32_t mdelay)594 ml_set_maxbusdelay(uint32_t mdelay)
595 {
596 	uint64_t    maxdelay        = mdelay;
597 
598 	if (pmDispatch != NULL
599 	    && pmDispatch->setMaxBusDelay != NULL) {
600 		earlyMaxBusDelay = DELAY_UNSET;
601 		pmDispatch->setMaxBusDelay(maxdelay);
602 	} else {
603 		earlyMaxBusDelay = maxdelay;
604 	}
605 }
606 
607 uint64_t
ml_get_maxintdelay(void)608 ml_get_maxintdelay(void)
609 {
610 	uint64_t    max_delay       = 0;
611 
612 	if (pmDispatch != NULL
613 	    && pmDispatch->getMaxIntDelay != NULL) {
614 		max_delay = pmDispatch->getMaxIntDelay();
615 	}
616 
617 	return max_delay;
618 }
619 
620 /*
621  * Set the maximum delay allowed for an interrupt.
622  */
623 void
ml_set_maxintdelay(uint64_t mdelay)624 ml_set_maxintdelay(uint64_t mdelay)
625 {
626 	if (pmDispatch != NULL
627 	    && pmDispatch->setMaxIntDelay != NULL) {
628 		earlyMaxIntDelay = DELAY_UNSET;
629 		pmDispatch->setMaxIntDelay(mdelay);
630 	} else {
631 		earlyMaxIntDelay = mdelay;
632 	}
633 }
634 
635 boolean_t
ml_get_interrupt_prewake_applicable()636 ml_get_interrupt_prewake_applicable()
637 {
638 	boolean_t applicable = FALSE;
639 
640 	if (pmInitDone
641 	    && pmDispatch != NULL
642 	    && pmDispatch->pmInterruptPrewakeApplicable != NULL) {
643 		applicable = pmDispatch->pmInterruptPrewakeApplicable();
644 	}
645 
646 	return applicable;
647 }
648 
649 /*
650  * Put a CPU into "safe" mode with respect to power.
651  *
652  * Some systems cannot operate at a continuous "normal" speed without
653  * exceeding the thermal design.  This is called per-CPU to place the
654  * CPUs into a "safe" operating mode.
655  */
656 void
pmSafeMode(x86_lcpu_t * lcpu,uint32_t flags)657 pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
658 {
659 	if (pmDispatch != NULL
660 	    && pmDispatch->pmCPUSafeMode != NULL) {
661 		pmDispatch->pmCPUSafeMode(lcpu, flags);
662 	} else {
663 		/*
664 		 * Do something reasonable if the KEXT isn't present.
665 		 *
666 		 * We only look at the PAUSE and RESUME flags.  The other flag(s)
667 		 * will not make any sense without the KEXT, so just ignore them.
668 		 *
669 		 * We set the CPU's state to indicate that it's halted.  If this
670 		 * is the CPU we're currently running on, then spin until the
671 		 * state becomes non-halted.
672 		 */
673 		if (flags & PM_SAFE_FL_PAUSE) {
674 			lcpu->state = LCPU_PAUSE;
675 			if (lcpu == x86_lcpu()) {
676 				while (lcpu->state == LCPU_PAUSE) {
677 					cpu_pause();
678 				}
679 			}
680 		}
681 
682 		/*
683 		 * Clear the halted flag for the specified CPU, that will
684 		 * get it out of its spin loop.
685 		 */
686 		if (flags & PM_SAFE_FL_RESUME) {
687 			lcpu->state = LCPU_RUN;
688 		}
689 	}
690 }
691 
692 static uint32_t         saved_run_count = 0;
693 
694 void
machine_run_count(uint32_t count)695 machine_run_count(uint32_t count)
696 {
697 	if (pmDispatch != NULL
698 	    && pmDispatch->pmSetRunCount != NULL) {
699 		pmDispatch->pmSetRunCount(count);
700 	} else {
701 		saved_run_count = count;
702 	}
703 }
704 
705 processor_t
machine_choose_processor(processor_set_t pset,processor_t preferred)706 machine_choose_processor(processor_set_t pset,
707     processor_t preferred)
708 {
709 	int         startCPU;
710 	int         endCPU;
711 	int         preferredCPU;
712 	int         chosenCPU;
713 
714 	if (!pmInitDone) {
715 		return preferred;
716 	}
717 
718 	if (pset == NULL) {
719 		startCPU = -1;
720 		endCPU = -1;
721 	} else {
722 		startCPU = pset->cpu_set_low;
723 		endCPU = pset->cpu_set_hi;
724 	}
725 
726 	if (preferred == NULL) {
727 		preferredCPU = -1;
728 	} else {
729 		preferredCPU = preferred->cpu_id;
730 	}
731 
732 	if (pmDispatch != NULL
733 	    && pmDispatch->pmChooseCPU != NULL) {
734 		chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
735 
736 		if (chosenCPU == -1) {
737 			return NULL;
738 		}
739 		return cpu_datap(chosenCPU)->cpu_processor;
740 	}
741 
742 	return preferred;
743 }
744 
745 static int
pmThreadGetUrgency(uint64_t * rt_period,uint64_t * rt_deadline)746 pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
747 {
748 #pragma unused(rt_period, rt_deadline)
749 	return THREAD_URGENCY_NONE;
750 }
751 
752 #if     DEBUG
753 uint32_t        urgency_stats[64][THREAD_URGENCY_MAX];
754 #endif
755 
756 #define         URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
757 uint64_t        urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
758 
759 void
thread_tell_urgency(thread_urgency_t urgency,uint64_t rt_period,uint64_t rt_deadline,uint64_t sched_latency,thread_t nthread)760 thread_tell_urgency(thread_urgency_t urgency,
761     uint64_t rt_period,
762     uint64_t rt_deadline,
763     uint64_t sched_latency,
764     thread_t nthread)
765 {
766 	uint64_t        urgency_notification_time_start = 0, delta;
767 	boolean_t       urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
768 	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
769 #if     DEBUG
770 	urgency_stats[cpu_number() % 64][urgency]++;
771 #endif
772 	if (!pmInitDone
773 	    || pmDispatch == NULL
774 	    || pmDispatch->pmThreadTellUrgency == NULL) {
775 		return;
776 	}
777 
778 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
779 
780 	if (__improbable((urgency_assert == TRUE))) {
781 		urgency_notification_time_start = mach_absolute_time();
782 	}
783 
784 	current_cpu_datap()->cpu_nthread = nthread;
785 	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
786 
787 	if (__improbable((urgency_assert == TRUE))) {
788 		delta = mach_absolute_time() - urgency_notification_time_start;
789 
790 		if (__improbable(delta > urgency_notification_max_recorded)) {
791 			/* This is not synchronized, but it doesn't matter
792 			 * if we (rarely) miss an event, as it is statistically
793 			 * unlikely that it will never recur.
794 			 */
795 			urgency_notification_max_recorded = delta;
796 
797 			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) {
798 				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
799 			}
800 		}
801 	}
802 
803 	SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
804 }
805 
806 void
machine_thread_going_on_core(__unused thread_t new_thread,__unused thread_urgency_t urgency,__unused uint64_t sched_latency,__unused uint64_t same_pri_latency,__unused uint64_t dispatch_time)807 machine_thread_going_on_core(__unused thread_t      new_thread,
808     __unused thread_urgency_t           urgency,
809     __unused uint64_t      sched_latency,
810     __unused uint64_t      same_pri_latency,
811     __unused uint64_t      dispatch_time)
812 {
813 }
814 
815 void
machine_thread_going_off_core(thread_t old_thread,boolean_t thread_terminating,uint64_t last_dispatch,boolean_t thread_runnable)816 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
817     uint64_t last_dispatch, boolean_t thread_runnable)
818 {
819 	if (!pmInitDone
820 	    || pmDispatch == NULL
821 	    || pmDispatch->pmThreadGoingOffCore == NULL) {
822 		return;
823 	}
824 
825 	pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating,
826 	    last_dispatch, thread_runnable);
827 }
828 
829 void
machine_max_runnable_latency(__unused uint64_t bg_max_latency,__unused uint64_t default_max_latency,__unused uint64_t realtime_max_latency)830 machine_max_runnable_latency(__unused uint64_t bg_max_latency,
831     __unused uint64_t default_max_latency,
832     __unused uint64_t realtime_max_latency)
833 {
834 }
835 
836 void
machine_work_interval_notify(__unused thread_t thread,__unused struct kern_work_interval_args * kwi_args)837 machine_work_interval_notify(__unused thread_t thread,
838     __unused struct kern_work_interval_args* kwi_args)
839 {
840 }
841 
842 
843 void
machine_switch_perfcontrol_context(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused uint64_t new_thread_same_pri_latency,__unused thread_t old,__unused thread_t new)844 machine_switch_perfcontrol_context(__unused perfcontrol_event event,
845     __unused uint64_t timestamp,
846     __unused uint32_t flags,
847     __unused uint64_t new_thread_same_pri_latency,
848     __unused thread_t old,
849     __unused thread_t new)
850 {
851 }
852 
853 void
machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused thread_t thread)854 machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,
855     __unused uint64_t timestamp,
856     __unused uint32_t flags,
857     __unused thread_t thread)
858 {
859 }
860 
861 void
active_rt_threads(boolean_t active)862 active_rt_threads(boolean_t active)
863 {
864 	if (!pmInitDone
865 	    || pmDispatch == NULL
866 	    || pmDispatch->pmActiveRTThreads == NULL) {
867 		return;
868 	}
869 
870 	pmDispatch->pmActiveRTThreads(active);
871 }
872 
873 static uint32_t
pmGetSavedRunCount(void)874 pmGetSavedRunCount(void)
875 {
876 	return saved_run_count;
877 }
878 
879 /*
880  * Returns the root of the package tree.
881  */
882 x86_pkg_t *
pmGetPkgRoot(void)883 pmGetPkgRoot(void)
884 {
885 	return x86_pkgs;
886 }
887 
888 static boolean_t
pmCPUGetHibernate(int cpu)889 pmCPUGetHibernate(int cpu)
890 {
891 	return cpu_datap(cpu)->cpu_hibernate;
892 }
893 
894 processor_t
pmLCPUtoProcessor(int lcpu)895 pmLCPUtoProcessor(int lcpu)
896 {
897 	return cpu_datap(lcpu)->cpu_processor;
898 }
899 
900 static void
pmReSyncDeadlines(int cpu)901 pmReSyncDeadlines(int cpu)
902 {
903 	static boolean_t    registered      = FALSE;
904 
905 	if (!registered) {
906 		PM_interrupt_register(&timer_resync_deadlines);
907 		registered = TRUE;
908 	}
909 
910 	if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) {
911 		timer_resync_deadlines();
912 	} else {
913 		cpu_PM_interrupt(cpu);
914 	}
915 }
916 
917 static void
pmSendIPI(int cpu)918 pmSendIPI(int cpu)
919 {
920 	lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
921 }
922 
923 static void
pmGetNanotimeInfo(pm_rtc_nanotime_t * rtc_nanotime)924 pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
925 {
926 	/*
927 	 * Make sure that nanotime didn't change while we were reading it.
928 	 */
929 	do {
930 		rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
931 		rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
932 		rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
933 		rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
934 		rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
935 	} while (pal_rtc_nanotime_info.generation != 0
936 	    && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
937 }
938 
939 uint32_t
pmTimerQueueMigrate(int target_cpu)940 pmTimerQueueMigrate(int target_cpu)
941 {
942 	/* Call the etimer code to do this. */
943 	return (target_cpu != cpu_number())
944 	       ? timer_queue_migrate_cpu(target_cpu)
945 	       : 0;
946 }
947 
948 
949 /*
950  * Called by the power management kext to register itself and to get the
951  * callbacks it might need into other kernel functions.  This interface
952  * is versioned to allow for slight mis-matches between the kext and the
953  * kernel.
954  */
955 void
pmKextRegister(uint32_t version,pmDispatch_t * cpuFuncs,pmCallBacks_t * callbacks)956 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
957     pmCallBacks_t *callbacks)
958 {
959 	if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
960 		callbacks->setRTCPop            = setPop;
961 		callbacks->resyncDeadlines      = pmReSyncDeadlines;
962 		callbacks->initComplete         = pmInitComplete;
963 		callbacks->GetLCPU              = pmGetLogicalCPU;
964 		callbacks->GetCore              = pmGetCore;
965 		callbacks->GetDie               = pmGetDie;
966 		callbacks->GetPackage           = pmGetPackage;
967 		callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
968 		callbacks->GetMyCore            = pmGetMyCore;
969 		callbacks->GetMyDie             = pmGetMyDie;
970 		callbacks->GetMyPackage         = pmGetMyPackage;
971 		callbacks->GetPkgRoot           = pmGetPkgRoot;
972 		callbacks->LockCPUTopology      = pmLockCPUTopology;
973 		callbacks->GetHibernate         = pmCPUGetHibernate;
974 		callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
975 		callbacks->ThreadBind           = thread_bind;
976 		callbacks->GetSavedRunCount     = pmGetSavedRunCount;
977 		callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
978 		callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
979 		callbacks->RTCClockAdjust       = rtc_clock_adjust;
980 		callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
981 		callbacks->topoParms            = &topoParms;
982 		callbacks->pmSendIPI            = pmSendIPI;
983 		callbacks->InterruptPending     = lapic_is_interrupt_pending;
984 		callbacks->IsInterrupting       = lapic_is_interrupting;
985 		callbacks->InterruptStats       = lapic_interrupt_counts;
986 		callbacks->DisableApicTimer     = lapic_disable_timer;
987 	} else {
988 		panic("Version mis-match between Kernel and CPU PM");
989 	}
990 
991 	if (cpuFuncs != NULL) {
992 		if (pmDispatch) {
993 			panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
994 		}
995 
996 		pmDispatch = cpuFuncs;
997 
998 		if (earlyTopology
999 		    && pmDispatch->pmCPUStateInit != NULL) {
1000 			(*pmDispatch->pmCPUStateInit)();
1001 			earlyTopology = FALSE;
1002 		}
1003 
1004 		if (pmDispatch->pmIPIHandler != NULL) {
1005 			lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
1006 		}
1007 	}
1008 }
1009 
1010 /*
1011  * Unregisters the power management functions from the kext.
1012  */
1013 void
pmUnRegister(pmDispatch_t * cpuFuncs)1014 pmUnRegister(pmDispatch_t *cpuFuncs)
1015 {
1016 	if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
1017 		pmDispatch = NULL;
1018 	}
1019 }
1020 
1021 void
machine_track_platform_idle(boolean_t entry)1022 machine_track_platform_idle(boolean_t entry)
1023 {
1024 	cpu_data_t              *my_cpu         = current_cpu_datap();
1025 
1026 	if (entry) {
1027 		(void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
1028 	} else {
1029 		uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
1030 		if (nidle == topoParms.nLThreadsPerPackage) {
1031 			my_cpu->lcpu.package->package_idle_exits++;
1032 		}
1033 	}
1034 }
1035