1 /*
2 * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * CPU-specific power management support.
31 *
32 * Implements the "wrappers" to the KEXT.
33 */
34 #include <i386/asm.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/mp.h>
37 #include <i386/machine_routines.h>
38 #include <i386/proc_reg.h>
39 #include <i386/pmap.h>
40 #include <i386/misc_protos.h>
41 #include <kern/machine.h>
42 #include <kern/pms.h>
43 #include <kern/processor.h>
44 #include <kern/timer_queue.h>
45 #include <i386/cpu_threads.h>
46 #include <i386/pmCPU.h>
47 #include <i386/cpuid.h>
48 #include <i386/rtclock_protos.h>
49 #include <kern/sched_prim.h>
50 #include <i386/lapic.h>
51 #include <i386/pal_routines.h>
52 #include <sys/kdebug.h>
53 #include <i386/tsc.h>
54
55 #include <kern/sched_urgency.h>
56
57 #define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
58
59 uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC};
60 uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
61
62 /*
63 * The following is set when the KEXT loads and initializes.
64 */
65 pmDispatch_t *pmDispatch = NULL;
66
67 uint32_t pmInitDone = 0;
68 static boolean_t earlyTopology = FALSE;
69 static uint64_t earlyMaxBusDelay = DELAY_UNSET;
70 static uint64_t earlyMaxIntDelay = DELAY_UNSET;
71
72 /*
73 * Initialize the Cstate change code.
74 */
75 void
power_management_init(void)76 power_management_init(void)
77 {
78 if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) {
79 (*pmDispatch->cstateInit)();
80 }
81 }
82
83 static inline void
machine_classify_interval(uint64_t interval,uint64_t * bins,uint64_t * binvals,uint32_t nbins)84 machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins)
85 {
86 uint32_t i;
87 for (i = 0; i < nbins; i++) {
88 if (interval < binvals[i]) {
89 bins[i]++;
90 break;
91 }
92 }
93 }
94
95 uint64_t idle_pending_timers_processed;
96 uint32_t idle_entry_timer_processing_hdeadline_threshold = 5000000;
97
98 /*
99 * Called when the CPU is idle. It calls into the power management kext
100 * to determine the best way to idle the CPU.
101 */
102 void
machine_idle(void)103 machine_idle(void)
104 {
105 cpu_data_t *my_cpu = current_cpu_datap();
106 __unused uint32_t cnum = my_cpu->cpu_number;
107 uint64_t ctime, rtime, itime;
108 #if CST_DEMOTION_DEBUG
109 processor_t cproc = my_cpu->cpu_processor;
110 uint64_t cwakeups = my_cpu->cpu_wakeups_issued_total;
111 #endif /* CST_DEMOTION_DEBUG */
112 uint64_t esdeadline, ehdeadline;
113 boolean_t do_process_pending_timers = FALSE;
114
115 ctime = mach_absolute_time();
116 esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
117 ehdeadline = my_cpu->rtclock_timer.deadline;
118 /* Determine if pending timers exist */
119 if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
120 ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
121 idle_pending_timers_processed++;
122 do_process_pending_timers = TRUE;
123 goto machine_idle_exit;
124 } else {
125 TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
126 }
127
128 my_cpu->lcpu.state = LCPU_IDLE;
129 DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
130 MARK_CPU_IDLE(cnum);
131
132 rtime = ctime - my_cpu->cpu_ixtime;
133
134 my_cpu->cpu_rtime_total += rtime;
135 machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
136 #if CST_DEMOTION_DEBUG
137 uint32_t cl = 0, ch = 0;
138 uint64_t c3res, c6res, c7res;
139 rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
140 c3res = ((uint64_t)ch << 32) | cl;
141 rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
142 c6res = ((uint64_t)ch << 32) | cl;
143 rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
144 c7res = ((uint64_t)ch << 32) | cl;
145 #endif
146
147 if (pmInitDone) {
148 /*
149 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
150 * were called prior to the CPU PM kext being registered. We do
151 * this here since we know at this point the values will be first
152 * used since idle is where the decisions using these values is made.
153 */
154 if (earlyMaxBusDelay != DELAY_UNSET) {
155 ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
156 }
157 if (earlyMaxIntDelay != DELAY_UNSET) {
158 ml_set_maxintdelay(earlyMaxIntDelay);
159 }
160 }
161
162 if (pmInitDone
163 && pmDispatch != NULL
164 && pmDispatch->MachineIdle != NULL) {
165 (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
166 } else {
167 /*
168 * If no power management, re-enable interrupts and halt.
169 * This will keep the CPU from spinning through the scheduler
170 * and will allow at least some minimal power savings (but it
171 * cause problems in some MP configurations w.r.t. the APIC
172 * stopping during a GV3 transition).
173 */
174 pal_hlt();
175 /* Once woken, re-disable interrupts. */
176 pal_cli();
177 }
178
179 /*
180 * Mark the CPU as running again.
181 */
182 MARK_CPU_ACTIVE(cnum);
183 DBGLOG(cpu_handle, cnum, MP_UNIDLE);
184 my_cpu->lcpu.state = LCPU_RUN;
185 uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
186 itime = ixtime - ctime;
187 my_cpu->cpu_idle_exits++;
188 my_cpu->cpu_itime_total += itime;
189 machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
190 #if CST_DEMOTION_DEBUG
191 cl = ch = 0;
192 rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
193 c3res = (((uint64_t)ch << 32) | cl) - c3res;
194 rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
195 c6res = (((uint64_t)ch << 32) | cl) - c6res;
196 rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
197 c7res = (((uint64_t)ch << 32) | cl) - c7res;
198
199 uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
200 KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
201 if ((itime > 1000000) && (ndelta > 250000)) {
202 KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
203 }
204 #endif
205
206 machine_idle_exit:
207 /*
208 * Re-enable interrupts.
209 */
210
211 pal_sti();
212
213 if (do_process_pending_timers) {
214 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
215
216 /* Adjust to reflect that this isn't truly a package idle exit */
217 __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
218 lapic_timer_swi(); /* Trigger software timer interrupt */
219 __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
220
221 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
222 }
223 #if CST_DEMOTION_DEBUG
224 uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total;
225
226 if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
227 KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
228 }
229 #endif
230 }
231
232 /*
233 * Called when the CPU is to be halted. It will choose the best C-State
234 * to be in.
235 */
236 void
pmCPUHalt(uint32_t reason)237 pmCPUHalt(uint32_t reason)
238 {
239 cpu_data_t *cpup = current_cpu_datap();
240
241 switch (reason) {
242 case PM_HALT_DEBUG:
243 cpup->lcpu.state = LCPU_PAUSE;
244 pal_stop_cpu(FALSE);
245 break;
246
247 case PM_HALT_PANIC:
248 cpup->lcpu.state = LCPU_PAUSE;
249 pal_stop_cpu(TRUE);
250 break;
251
252 case PM_HALT_NORMAL:
253 case PM_HALT_SLEEP:
254 default:
255 pal_cli();
256
257 if (pmInitDone
258 && pmDispatch != NULL
259 && pmDispatch->pmCPUHalt != NULL) {
260 /*
261 * Halt the CPU (and put it in a low power state.
262 */
263 (*pmDispatch->pmCPUHalt)();
264
265 /*
266 * We've exited halt, so get the CPU schedulable again.
267 * - by calling the fast init routine for a slave, or
268 * - by returning if we're the master processor.
269 */
270 if (cpup->cpu_number != master_cpu) {
271 i386_init_slave_fast();
272 panic("init_slave_fast returned");
273 }
274 } else {
275 /*
276 * If no power managment and a processor is taken off-line,
277 * then invalidate the cache and halt it (it will not be able
278 * to be brought back on-line without resetting the CPU).
279 */
280 __asm__ volatile ("wbinvd");
281 cpup->lcpu.state = LCPU_HALT;
282 pal_stop_cpu(FALSE);
283
284 panic("back from Halt");
285 }
286
287 break;
288 }
289 }
290
291 void
pmMarkAllCPUsOff(void)292 pmMarkAllCPUsOff(void)
293 {
294 if (pmInitDone
295 && pmDispatch != NULL
296 && pmDispatch->markAllCPUsOff != NULL) {
297 (*pmDispatch->markAllCPUsOff)();
298 }
299 }
300
301 static void
pmInitComplete(void)302 pmInitComplete(void)
303 {
304 if (earlyTopology
305 && pmDispatch != NULL
306 && pmDispatch->pmCPUStateInit != NULL) {
307 (*pmDispatch->pmCPUStateInit)();
308 earlyTopology = FALSE;
309 }
310 pmInitDone = 1;
311 }
312
313 x86_lcpu_t *
pmGetLogicalCPU(int cpu)314 pmGetLogicalCPU(int cpu)
315 {
316 return cpu_to_lcpu(cpu);
317 }
318
319 x86_lcpu_t *
pmGetMyLogicalCPU(void)320 pmGetMyLogicalCPU(void)
321 {
322 cpu_data_t *cpup = current_cpu_datap();
323
324 return &cpup->lcpu;
325 }
326
327 static x86_core_t *
pmGetCore(int cpu)328 pmGetCore(int cpu)
329 {
330 return cpu_to_core(cpu);
331 }
332
333 static x86_core_t *
pmGetMyCore(void)334 pmGetMyCore(void)
335 {
336 cpu_data_t *cpup = current_cpu_datap();
337
338 return cpup->lcpu.core;
339 }
340
341 static x86_die_t *
pmGetDie(int cpu)342 pmGetDie(int cpu)
343 {
344 return cpu_to_die(cpu);
345 }
346
347 static x86_die_t *
pmGetMyDie(void)348 pmGetMyDie(void)
349 {
350 cpu_data_t *cpup = current_cpu_datap();
351
352 return cpup->lcpu.die;
353 }
354
355 static x86_pkg_t *
pmGetPackage(int cpu)356 pmGetPackage(int cpu)
357 {
358 return cpu_to_package(cpu);
359 }
360
361 static x86_pkg_t *
pmGetMyPackage(void)362 pmGetMyPackage(void)
363 {
364 cpu_data_t *cpup = current_cpu_datap();
365
366 return cpup->lcpu.package;
367 }
368
369 static void
pmLockCPUTopology(int lock)370 pmLockCPUTopology(int lock)
371 {
372 if (lock) {
373 mp_safe_spin_lock(&x86_topo_lock);
374 } else {
375 simple_unlock(&x86_topo_lock);
376 }
377 }
378
379 /*
380 * Called to get the next deadline that has been set by the
381 * power management code.
382 * Note: a return of 0 from AICPM and this routine signifies
383 * that no deadline is set.
384 */
385 uint64_t
pmCPUGetDeadline(cpu_data_t * cpu)386 pmCPUGetDeadline(cpu_data_t *cpu)
387 {
388 uint64_t deadline = 0;
389
390 if (pmInitDone
391 && pmDispatch != NULL
392 && pmDispatch->GetDeadline != NULL) {
393 deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
394 }
395
396 return deadline;
397 }
398
399 /*
400 * Called to determine if the supplied deadline or the power management
401 * deadline is sooner. Returns which ever one is first.
402 */
403
404 uint64_t
pmCPUSetDeadline(cpu_data_t * cpu,uint64_t deadline)405 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
406 {
407 if (pmInitDone
408 && pmDispatch != NULL
409 && pmDispatch->SetDeadline != NULL) {
410 deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
411 }
412
413 return deadline;
414 }
415
416 /*
417 * Called when a power management deadline expires.
418 */
419 void
pmCPUDeadline(cpu_data_t * cpu)420 pmCPUDeadline(cpu_data_t *cpu)
421 {
422 if (pmInitDone
423 && pmDispatch != NULL
424 && pmDispatch->Deadline != NULL) {
425 (*pmDispatch->Deadline)(&cpu->lcpu);
426 }
427 }
428
429 /*
430 * Called to get a CPU out of idle.
431 */
432 boolean_t
pmCPUExitIdle(cpu_data_t * cpu)433 pmCPUExitIdle(cpu_data_t *cpu)
434 {
435 boolean_t do_ipi;
436
437 if (pmInitDone
438 && pmDispatch != NULL
439 && pmDispatch->exitIdle != NULL) {
440 do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
441 } else {
442 do_ipi = TRUE;
443 }
444
445 return do_ipi;
446 }
447
448 kern_return_t
pmCPUExitHalt(int cpu)449 pmCPUExitHalt(int cpu)
450 {
451 kern_return_t rc = KERN_INVALID_ARGUMENT;
452
453 if (pmInitDone
454 && pmDispatch != NULL
455 && pmDispatch->exitHalt != NULL) {
456 rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
457 }
458
459 return rc;
460 }
461
462 kern_return_t
pmCPUExitHaltToOff(int cpu)463 pmCPUExitHaltToOff(int cpu)
464 {
465 kern_return_t rc = KERN_SUCCESS;
466
467 if (pmInitDone
468 && pmDispatch != NULL
469 && pmDispatch->exitHaltToOff != NULL) {
470 rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
471 }
472
473 return rc;
474 }
475
476 /*
477 * Called to initialize the power management structures for the CPUs.
478 */
479 void
pmCPUStateInit(void)480 pmCPUStateInit(void)
481 {
482 if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) {
483 (*pmDispatch->pmCPUStateInit)();
484 } else {
485 earlyTopology = TRUE;
486 }
487 }
488
489 /*
490 * Called when a CPU is being restarted after being powered off (as in S3).
491 */
492 void
pmCPUMarkRunning(cpu_data_t * cpu)493 pmCPUMarkRunning(cpu_data_t *cpu)
494 {
495 cpu_data_t *cpup = current_cpu_datap();
496
497 if (pmInitDone
498 && pmDispatch != NULL
499 && pmDispatch->markCPURunning != NULL) {
500 (*pmDispatch->markCPURunning)(&cpu->lcpu);
501 } else {
502 cpup->lcpu.state = LCPU_RUN;
503 }
504 }
505
506 /*
507 * Called to get/set CPU power management state.
508 */
509 int
pmCPUControl(uint32_t cmd,void * datap)510 pmCPUControl(uint32_t cmd, void *datap)
511 {
512 int rc = -1;
513
514 if (pmDispatch != NULL
515 && pmDispatch->pmCPUControl != NULL) {
516 rc = (*pmDispatch->pmCPUControl)(cmd, datap);
517 }
518
519 return rc;
520 }
521
522 /*
523 * Called to save the timer state used by power management prior
524 * to "sleeping".
525 */
526 void
pmTimerSave(void)527 pmTimerSave(void)
528 {
529 if (pmDispatch != NULL
530 && pmDispatch->pmTimerStateSave != NULL) {
531 (*pmDispatch->pmTimerStateSave)();
532 }
533 }
534
535 /*
536 * Called to restore the timer state used by power management after
537 * waking from "sleep".
538 */
539 void
pmTimerRestore(void)540 pmTimerRestore(void)
541 {
542 if (pmDispatch != NULL
543 && pmDispatch->pmTimerStateRestore != NULL) {
544 (*pmDispatch->pmTimerStateRestore)();
545 }
546 }
547
548 /*
549 * Set the worst-case time for the C4 to C2 transition.
550 * No longer does anything.
551 */
552 void
ml_set_maxsnoop(__unused uint32_t maxdelay)553 ml_set_maxsnoop(__unused uint32_t maxdelay)
554 {
555 }
556
557
558 /*
559 * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
560 */
561 unsigned
ml_get_maxsnoop(void)562 ml_get_maxsnoop(void)
563 {
564 uint64_t max_snoop = 0;
565
566 if (pmInitDone
567 && pmDispatch != NULL
568 && pmDispatch->getMaxSnoop != NULL) {
569 max_snoop = pmDispatch->getMaxSnoop();
570 }
571
572 return (unsigned)(max_snoop & 0xffffffff);
573 }
574
575
576 uint32_t
ml_get_maxbusdelay(void)577 ml_get_maxbusdelay(void)
578 {
579 uint64_t max_delay = 0;
580
581 if (pmInitDone
582 && pmDispatch != NULL
583 && pmDispatch->getMaxBusDelay != NULL) {
584 max_delay = pmDispatch->getMaxBusDelay();
585 }
586
587 return (uint32_t)(max_delay & 0xffffffff);
588 }
589
590 /*
591 * Advertise a memory access latency tolerance of "mdelay" ns
592 */
593 void
ml_set_maxbusdelay(uint32_t mdelay)594 ml_set_maxbusdelay(uint32_t mdelay)
595 {
596 uint64_t maxdelay = mdelay;
597
598 if (pmDispatch != NULL
599 && pmDispatch->setMaxBusDelay != NULL) {
600 earlyMaxBusDelay = DELAY_UNSET;
601 pmDispatch->setMaxBusDelay(maxdelay);
602 } else {
603 earlyMaxBusDelay = maxdelay;
604 }
605 }
606
607 uint64_t
ml_get_maxintdelay(void)608 ml_get_maxintdelay(void)
609 {
610 uint64_t max_delay = 0;
611
612 if (pmDispatch != NULL
613 && pmDispatch->getMaxIntDelay != NULL) {
614 max_delay = pmDispatch->getMaxIntDelay();
615 }
616
617 return max_delay;
618 }
619
620 /*
621 * Set the maximum delay allowed for an interrupt.
622 */
623 void
ml_set_maxintdelay(uint64_t mdelay)624 ml_set_maxintdelay(uint64_t mdelay)
625 {
626 if (pmDispatch != NULL
627 && pmDispatch->setMaxIntDelay != NULL) {
628 earlyMaxIntDelay = DELAY_UNSET;
629 pmDispatch->setMaxIntDelay(mdelay);
630 } else {
631 earlyMaxIntDelay = mdelay;
632 }
633 }
634
635 boolean_t
ml_get_interrupt_prewake_applicable()636 ml_get_interrupt_prewake_applicable()
637 {
638 boolean_t applicable = FALSE;
639
640 if (pmInitDone
641 && pmDispatch != NULL
642 && pmDispatch->pmInterruptPrewakeApplicable != NULL) {
643 applicable = pmDispatch->pmInterruptPrewakeApplicable();
644 }
645
646 return applicable;
647 }
648
649 /*
650 * Put a CPU into "safe" mode with respect to power.
651 *
652 * Some systems cannot operate at a continuous "normal" speed without
653 * exceeding the thermal design. This is called per-CPU to place the
654 * CPUs into a "safe" operating mode.
655 */
656 void
pmSafeMode(x86_lcpu_t * lcpu,uint32_t flags)657 pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
658 {
659 if (pmDispatch != NULL
660 && pmDispatch->pmCPUSafeMode != NULL) {
661 pmDispatch->pmCPUSafeMode(lcpu, flags);
662 } else {
663 /*
664 * Do something reasonable if the KEXT isn't present.
665 *
666 * We only look at the PAUSE and RESUME flags. The other flag(s)
667 * will not make any sense without the KEXT, so just ignore them.
668 *
669 * We set the CPU's state to indicate that it's halted. If this
670 * is the CPU we're currently running on, then spin until the
671 * state becomes non-halted.
672 */
673 if (flags & PM_SAFE_FL_PAUSE) {
674 lcpu->state = LCPU_PAUSE;
675 if (lcpu == x86_lcpu()) {
676 while (lcpu->state == LCPU_PAUSE) {
677 cpu_pause();
678 }
679 }
680 }
681
682 /*
683 * Clear the halted flag for the specified CPU, that will
684 * get it out of its spin loop.
685 */
686 if (flags & PM_SAFE_FL_RESUME) {
687 lcpu->state = LCPU_RUN;
688 }
689 }
690 }
691
692 static uint32_t saved_run_count = 0;
693
694 void
machine_run_count(uint32_t count)695 machine_run_count(uint32_t count)
696 {
697 if (pmDispatch != NULL
698 && pmDispatch->pmSetRunCount != NULL) {
699 pmDispatch->pmSetRunCount(count);
700 } else {
701 saved_run_count = count;
702 }
703 }
704
705 processor_t
machine_choose_processor(processor_set_t pset,processor_t preferred)706 machine_choose_processor(processor_set_t pset,
707 processor_t preferred)
708 {
709 int startCPU;
710 int endCPU;
711 int preferredCPU;
712 int chosenCPU;
713
714 if (!pmInitDone) {
715 return preferred;
716 }
717
718 if (pset == NULL) {
719 startCPU = -1;
720 endCPU = -1;
721 } else {
722 startCPU = pset->cpu_set_low;
723 endCPU = pset->cpu_set_hi;
724 }
725
726 if (preferred == NULL) {
727 preferredCPU = -1;
728 } else {
729 preferredCPU = preferred->cpu_id;
730 }
731
732 if (pmDispatch != NULL
733 && pmDispatch->pmChooseCPU != NULL) {
734 chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
735
736 if (chosenCPU == -1) {
737 return NULL;
738 }
739 return cpu_datap(chosenCPU)->cpu_processor;
740 }
741
742 return preferred;
743 }
744
745 static int
pmThreadGetUrgency(uint64_t * rt_period,uint64_t * rt_deadline)746 pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
747 {
748 #pragma unused(rt_period, rt_deadline)
749 return THREAD_URGENCY_NONE;
750 }
751
752 #if DEBUG
753 uint32_t urgency_stats[64][THREAD_URGENCY_MAX];
754 #endif
755
756 #define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
757 uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
758
759 void
thread_tell_urgency(thread_urgency_t urgency,uint64_t rt_period,uint64_t rt_deadline,uint64_t sched_latency,thread_t nthread)760 thread_tell_urgency(thread_urgency_t urgency,
761 uint64_t rt_period,
762 uint64_t rt_deadline,
763 uint64_t sched_latency,
764 thread_t nthread)
765 {
766 uint64_t urgency_notification_time_start = 0, delta;
767 boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
768 assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
769 #if DEBUG
770 urgency_stats[cpu_number() % 64][urgency]++;
771 #endif
772 if (!pmInitDone
773 || pmDispatch == NULL
774 || pmDispatch->pmThreadTellUrgency == NULL) {
775 return;
776 }
777
778 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
779
780 if (__improbable((urgency_assert == TRUE))) {
781 urgency_notification_time_start = mach_absolute_time();
782 }
783
784 current_cpu_datap()->cpu_nthread = nthread;
785 pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
786
787 if (__improbable((urgency_assert == TRUE))) {
788 delta = mach_absolute_time() - urgency_notification_time_start;
789
790 if (__improbable(delta > urgency_notification_max_recorded)) {
791 /* This is not synchronized, but it doesn't matter
792 * if we (rarely) miss an event, as it is statistically
793 * unlikely that it will never recur.
794 */
795 urgency_notification_max_recorded = delta;
796
797 if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) {
798 panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
799 }
800 }
801 }
802
803 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
804 }
805
806 void
machine_thread_going_on_core(__unused thread_t new_thread,__unused thread_urgency_t urgency,__unused uint64_t sched_latency,__unused uint64_t same_pri_latency,__unused uint64_t dispatch_time)807 machine_thread_going_on_core(__unused thread_t new_thread,
808 __unused thread_urgency_t urgency,
809 __unused uint64_t sched_latency,
810 __unused uint64_t same_pri_latency,
811 __unused uint64_t dispatch_time)
812 {
813 }
814
815 void
machine_thread_going_off_core(thread_t old_thread,boolean_t thread_terminating,uint64_t last_dispatch,boolean_t thread_runnable)816 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
817 uint64_t last_dispatch, boolean_t thread_runnable)
818 {
819 if (!pmInitDone
820 || pmDispatch == NULL
821 || pmDispatch->pmThreadGoingOffCore == NULL) {
822 return;
823 }
824
825 pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating,
826 last_dispatch, thread_runnable);
827 }
828
829 void
machine_max_runnable_latency(__unused uint64_t bg_max_latency,__unused uint64_t default_max_latency,__unused uint64_t realtime_max_latency)830 machine_max_runnable_latency(__unused uint64_t bg_max_latency,
831 __unused uint64_t default_max_latency,
832 __unused uint64_t realtime_max_latency)
833 {
834 }
835
836 void
machine_work_interval_notify(__unused thread_t thread,__unused struct kern_work_interval_args * kwi_args)837 machine_work_interval_notify(__unused thread_t thread,
838 __unused struct kern_work_interval_args* kwi_args)
839 {
840 }
841
842
843 void
machine_switch_perfcontrol_context(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused uint64_t new_thread_same_pri_latency,__unused thread_t old,__unused thread_t new)844 machine_switch_perfcontrol_context(__unused perfcontrol_event event,
845 __unused uint64_t timestamp,
846 __unused uint32_t flags,
847 __unused uint64_t new_thread_same_pri_latency,
848 __unused thread_t old,
849 __unused thread_t new)
850 {
851 }
852
853 void
machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,__unused uint64_t timestamp,__unused uint32_t flags,__unused thread_t thread)854 machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,
855 __unused uint64_t timestamp,
856 __unused uint32_t flags,
857 __unused thread_t thread)
858 {
859 }
860
861 void
active_rt_threads(boolean_t active)862 active_rt_threads(boolean_t active)
863 {
864 if (!pmInitDone
865 || pmDispatch == NULL
866 || pmDispatch->pmActiveRTThreads == NULL) {
867 return;
868 }
869
870 pmDispatch->pmActiveRTThreads(active);
871 }
872
873 static uint32_t
pmGetSavedRunCount(void)874 pmGetSavedRunCount(void)
875 {
876 return saved_run_count;
877 }
878
879 /*
880 * Returns the root of the package tree.
881 */
882 x86_pkg_t *
pmGetPkgRoot(void)883 pmGetPkgRoot(void)
884 {
885 return x86_pkgs;
886 }
887
888 static boolean_t
pmCPUGetHibernate(int cpu)889 pmCPUGetHibernate(int cpu)
890 {
891 return cpu_datap(cpu)->cpu_hibernate;
892 }
893
894 processor_t
pmLCPUtoProcessor(int lcpu)895 pmLCPUtoProcessor(int lcpu)
896 {
897 return cpu_datap(lcpu)->cpu_processor;
898 }
899
900 static void
pmReSyncDeadlines(int cpu)901 pmReSyncDeadlines(int cpu)
902 {
903 static boolean_t registered = FALSE;
904
905 if (!registered) {
906 PM_interrupt_register(&timer_resync_deadlines);
907 registered = TRUE;
908 }
909
910 if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) {
911 timer_resync_deadlines();
912 } else {
913 cpu_PM_interrupt(cpu);
914 }
915 }
916
917 static void
pmSendIPI(int cpu)918 pmSendIPI(int cpu)
919 {
920 lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
921 }
922
923 static void
pmGetNanotimeInfo(pm_rtc_nanotime_t * rtc_nanotime)924 pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
925 {
926 /*
927 * Make sure that nanotime didn't change while we were reading it.
928 */
929 do {
930 rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
931 rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
932 rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
933 rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
934 rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
935 } while (pal_rtc_nanotime_info.generation != 0
936 && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
937 }
938
939 uint32_t
pmTimerQueueMigrate(int target_cpu)940 pmTimerQueueMigrate(int target_cpu)
941 {
942 /* Call the etimer code to do this. */
943 return (target_cpu != cpu_number())
944 ? timer_queue_migrate_cpu(target_cpu)
945 : 0;
946 }
947
948
949 /*
950 * Called by the power management kext to register itself and to get the
951 * callbacks it might need into other kernel functions. This interface
952 * is versioned to allow for slight mis-matches between the kext and the
953 * kernel.
954 */
955 void
pmKextRegister(uint32_t version,pmDispatch_t * cpuFuncs,pmCallBacks_t * callbacks)956 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
957 pmCallBacks_t *callbacks)
958 {
959 if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
960 callbacks->setRTCPop = setPop;
961 callbacks->resyncDeadlines = pmReSyncDeadlines;
962 callbacks->initComplete = pmInitComplete;
963 callbacks->GetLCPU = pmGetLogicalCPU;
964 callbacks->GetCore = pmGetCore;
965 callbacks->GetDie = pmGetDie;
966 callbacks->GetPackage = pmGetPackage;
967 callbacks->GetMyLCPU = pmGetMyLogicalCPU;
968 callbacks->GetMyCore = pmGetMyCore;
969 callbacks->GetMyDie = pmGetMyDie;
970 callbacks->GetMyPackage = pmGetMyPackage;
971 callbacks->GetPkgRoot = pmGetPkgRoot;
972 callbacks->LockCPUTopology = pmLockCPUTopology;
973 callbacks->GetHibernate = pmCPUGetHibernate;
974 callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
975 callbacks->ThreadBind = thread_bind;
976 callbacks->GetSavedRunCount = pmGetSavedRunCount;
977 callbacks->GetNanotimeInfo = pmGetNanotimeInfo;
978 callbacks->ThreadGetUrgency = pmThreadGetUrgency;
979 callbacks->RTCClockAdjust = rtc_clock_adjust;
980 callbacks->timerQueueMigrate = pmTimerQueueMigrate;
981 callbacks->topoParms = &topoParms;
982 callbacks->pmSendIPI = pmSendIPI;
983 callbacks->InterruptPending = lapic_is_interrupt_pending;
984 callbacks->IsInterrupting = lapic_is_interrupting;
985 callbacks->InterruptStats = lapic_interrupt_counts;
986 callbacks->DisableApicTimer = lapic_disable_timer;
987 } else {
988 panic("Version mis-match between Kernel and CPU PM");
989 }
990
991 if (cpuFuncs != NULL) {
992 if (pmDispatch) {
993 panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
994 }
995
996 pmDispatch = cpuFuncs;
997
998 if (earlyTopology
999 && pmDispatch->pmCPUStateInit != NULL) {
1000 (*pmDispatch->pmCPUStateInit)();
1001 earlyTopology = FALSE;
1002 }
1003
1004 if (pmDispatch->pmIPIHandler != NULL) {
1005 lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
1006 }
1007 }
1008 }
1009
1010 /*
1011 * Unregisters the power management functions from the kext.
1012 */
1013 void
pmUnRegister(pmDispatch_t * cpuFuncs)1014 pmUnRegister(pmDispatch_t *cpuFuncs)
1015 {
1016 if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
1017 pmDispatch = NULL;
1018 }
1019 }
1020
1021 void
machine_track_platform_idle(boolean_t entry)1022 machine_track_platform_idle(boolean_t entry)
1023 {
1024 cpu_data_t *my_cpu = current_cpu_datap();
1025
1026 if (entry) {
1027 (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
1028 } else {
1029 uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
1030 if (nidle == topoParms.nLThreadsPerPackage) {
1031 my_cpu->lcpu.package->package_idle_exits++;
1032 }
1033 }
1034 }
1035