/* * Copyright (c) 2011-2021 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * This file manages the timers used for on-CPU samples and PET. * * Each timer configured by a tool is represented by a kptimer structure. * The timer calls present in each structure are used to schedule CPU-local * timers. As each timer fires, that CPU samples itself and schedules another * timer to fire at the next deadline. The first timer to fire across all CPUs * determines that deadline. This causes the timers to fire at a consistent * cadence. * * Traditional PET uses a timer call to wake up its sampling thread and take * on-CPU samples. * * Synchronization for start and stop is provided by the ktrace subsystem lock. * Global state is stored in a single struct, to ease debugging. */ #include #include /* current_thread() */ #include #include #include #include #include #include #include #include #if defined(__x86_64__) #include #endif /* defined(__x86_64__) */ #include #include #include #include #include #include #include #define KPTIMER_PET_INACTIVE (999) #define KPTIMER_MAX (8) struct kptimer { uint32_t kt_actionid; uint64_t kt_period_abs; /* * The `kt_cur_deadline` field represents when the timer should next fire. * It's used to synchronize between timers firing on each CPU. In the timer * handler, each CPU will take the `kt_lock` and see if the * `kt_cur_deadline` still needs to be updated for the timer fire. If so, * it updates it and logs the timer fire event under the lock. */ lck_spin_t kt_lock; uint64_t kt_cur_deadline; #if DEVELOPMENT || DEBUG /* * To be set by the timer leader as a debugging aid for timeouts, if kperf * happens to be on-CPU when they occur. */ uint64_t kt_fire_time; #endif /* DEVELOPMENT || DEBUG */ }; static struct { struct kptimer *g_timers; uint64_t *g_cpu_deadlines; unsigned int g_ntimers; unsigned int g_pet_timerid; bool g_setup:1; bool g_pet_active:1; bool g_started:1; struct timer_call g_pet_timer; } kptimer = { .g_pet_timerid = KPTIMER_PET_INACTIVE, }; SECURITY_READ_ONLY_LATE(static uint64_t) kptimer_minperiods_mtu[KTPL_MAX]; /* * Enforce a minimum timer period to prevent interrupt storms. */ const uint64_t kptimer_minperiods_ns[KTPL_MAX] = { #if defined(__x86_64__) [KTPL_FG] = 20 * NSEC_PER_USEC, /* The minimum timer period in xnu, period. */ [KTPL_BG] = 1 * NSEC_PER_MSEC, [KTPL_FG_PET] = 2 * NSEC_PER_MSEC, [KTPL_BG_PET] = 5 * NSEC_PER_MSEC, #elif defined(__arm64__) [KTPL_FG] = 50 * NSEC_PER_USEC, [KTPL_BG] = 1 * NSEC_PER_MSEC, [KTPL_FG_PET] = 2 * NSEC_PER_MSEC, [KTPL_BG_PET] = 10 * NSEC_PER_MSEC, #else #error unexpected architecture #endif }; static void kptimer_pet_handler(void * __unused param1, void * __unused param2); static void kptimer_stop_cpu(processor_t processor); void kptimer_init(void) { for (int i = 0; i < KTPL_MAX; i++) { nanoseconds_to_absolutetime(kptimer_minperiods_ns[i], &kptimer_minperiods_mtu[i]); } } static void kptimer_set_cpu_deadline(int cpuid, int timerid, uint64_t deadline) { kptimer.g_cpu_deadlines[(cpuid * KPTIMER_MAX) + timerid] = deadline; } static void kptimer_setup(void) { if (kptimer.g_setup) { return; } static lck_grp_t kptimer_lock_grp; lck_grp_init(&kptimer_lock_grp, "kptimer", LCK_GRP_ATTR_NULL); const size_t timers_size = KPTIMER_MAX * sizeof(struct kptimer); kptimer.g_timers = zalloc_permanent_tag(timers_size, ZALIGN(struct kptimer), VM_KERN_MEMORY_DIAG); for (int i = 0; i < KPTIMER_MAX; i++) { lck_spin_init(&kptimer.g_timers[i].kt_lock, &kptimer_lock_grp, LCK_ATTR_NULL); } const size_t deadlines_size = machine_info.logical_cpu_max * KPTIMER_MAX * sizeof(kptimer.g_cpu_deadlines[0]); kptimer.g_cpu_deadlines = zalloc_permanent_tag(deadlines_size, ZALIGN_64, VM_KERN_MEMORY_DIAG); for (int i = 0; i < KPTIMER_MAX; i++) { for (int j = 0; j < machine_info.logical_cpu_max; j++) { kptimer_set_cpu_deadline(j, i, EndOfAllTime); } } timer_call_setup(&kptimer.g_pet_timer, kptimer_pet_handler, NULL); kptimer.g_setup = true; } void kptimer_reset(void) { kptimer_stop(); kptimer_set_pet_timerid(KPTIMER_PET_INACTIVE); for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { kptimer.g_timers[i].kt_period_abs = 0; kptimer.g_timers[i].kt_actionid = 0; for (int j = 0; j < machine_info.logical_cpu_max; j++) { kptimer_set_cpu_deadline(j, i, EndOfAllTime); } } } #pragma mark - deadline management static uint64_t kptimer_get_cpu_deadline(int cpuid, int timerid) { return kptimer.g_cpu_deadlines[(cpuid * KPTIMER_MAX) + timerid]; } static void kptimer_sample_curcpu(unsigned int actionid, unsigned int timerid, uint32_t flags) { struct kperf_sample *intbuf = kperf_intr_sample_buffer(); #if DEVELOPMENT || DEBUG intbuf->sample_time = mach_absolute_time(); #endif /* DEVELOPMENT || DEBUG */ BUF_DATA(PERF_TM_HNDLR | DBG_FUNC_START); thread_t thread = current_thread(); task_t task = get_threadtask(thread); struct kperf_context ctx = { .cur_thread = thread, .cur_task = task, .cur_pid = task_pid(task), .trigger_type = TRIGGER_TYPE_TIMER, .trigger_id = timerid, }; (void)kperf_sample(intbuf, &ctx, actionid, SAMPLE_FLAG_PEND_USER | flags); BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END); } static void kptimer_lock(struct kptimer *timer) { lck_spin_lock(&timer->kt_lock); } static void kptimer_unlock(struct kptimer *timer) { lck_spin_unlock(&timer->kt_lock); } /* * If the deadline expired in the past, find the next deadline to program, * locked into the cadence provided by the period. */ static inline uint64_t dead_reckon_deadline(uint64_t now, uint64_t deadline, uint64_t period) { if (deadline < now) { uint64_t time_since = now - deadline; uint64_t extra_time = period - (time_since % period); return now + extra_time; } return deadline; } static uint64_t kptimer_fire(struct kptimer *timer, unsigned int timerid, uint64_t deadline, int __unused cpuid, uint64_t now) { bool first = false; uint64_t next_deadline = deadline + timer->kt_period_abs; /* * It's not straightforward to replace this lock with a compare-exchange, * since the PERF_TM_FIRE event must be emitted *before* any subsequent * PERF_TM_HNDLR events, so tools can understand the handlers are responding * to this timer fire. */ kptimer_lock(timer); if (timer->kt_cur_deadline < next_deadline) { first = true; next_deadline = dead_reckon_deadline(now, next_deadline, timer->kt_period_abs); timer->kt_cur_deadline = next_deadline; BUF_DATA(PERF_TM_FIRE, timerid, timerid == kptimer.g_pet_timerid, timer->kt_period_abs, timer->kt_actionid); #if DEVELOPMENT || DEBUG /* * Debugging aid to see the last time this timer fired. */ timer->kt_fire_time = mach_absolute_time(); #endif /* DEVELOPMENT || DEBUG */ if (timerid == kptimer.g_pet_timerid && kppet_get_lightweight_pet()) { os_atomic_inc(&kppet_gencount, relaxed); } } else { /* * In case this CPU has missed several timer fires, get it back on track * by synchronizing with the latest timer fire. */ next_deadline = timer->kt_cur_deadline; } kptimer_unlock(timer); if (!first && !kperf_action_has_non_system(timer->kt_actionid)) { /* * The first timer to fire will sample the system, so there's * no need to run other timers if those are the only samplers * for this action. */ return next_deadline; } kptimer_sample_curcpu(timer->kt_actionid, timerid, first ? SAMPLE_FLAG_SYSTEM : 0); return next_deadline; } /* * Determine which of the timers fired. */ void kptimer_expire(processor_t processor, int cpuid, uint64_t now) { uint64_t min_deadline = UINT64_MAX; enum kperf_sampling status = os_atomic_load(&kperf_status, acquire); switch (status) { case KPERF_SAMPLING_ON: break; case KPERF_SAMPLING_SHUTDOWN: kptimer_stop_cpu(processor); return; case KPERF_SAMPLING_OFF: panic("kperf: timer fired at %llu, but sampling is disabled", now); default: panic("kperf: unknown sampling state 0x%x", status); } for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { struct kptimer *timer = &kptimer.g_timers[i]; if (timer->kt_period_abs == 0) { continue; } uint64_t cpudeadline = kptimer_get_cpu_deadline(cpuid, i); if (now > cpudeadline) { uint64_t deadline = kptimer_fire(timer, i, cpudeadline, cpuid, now); if (deadline == 0) { kptimer_set_cpu_deadline(cpuid, i, EndOfAllTime); } else { kptimer_set_cpu_deadline(cpuid, i, deadline); if (deadline < min_deadline) { min_deadline = deadline; } } } } if (min_deadline < UINT64_MAX) { running_timer_enter(processor, RUNNING_TIMER_KPERF, NULL, min_deadline, mach_absolute_time()); } } #pragma mark - start/stop static void kptimer_broadcast(void (*fn)(void *)) { ktrace_assert_lock_held(); #if defined(__x86_64__) (void)mp_cpus_call(CPUMASK_ALL, ASYNC, fn, NULL); #else /* defined(__x86_64__) */ _Atomic uint32_t xcsync = 0; cpu_broadcast_xcall((uint32_t *)&xcsync, TRUE /* include self */, fn, &xcsync); #endif /* !defined(__x86_64__) */ } static void kptimer_broadcast_ack(void *arg) { #if defined(__x86_64__) #pragma unused(arg) #else /* defined(__x86_64__) */ _Atomic uint32_t *xcsync = arg; int pending = os_atomic_dec(xcsync, relaxed); if (pending == 0) { thread_wakeup(xcsync); } #endif /* !defined(__x86_64__) */ } static void kptimer_sample_pet_remote(void * __unused arg) { if (!kperf_is_sampling()) { return; } struct kptimer *timer = &kptimer.g_timers[kptimer.g_pet_timerid]; kptimer_sample_curcpu(timer->kt_actionid, kptimer.g_pet_timerid, 0); } #if !defined(__x86_64__) #include void kperf_signal_handler(void); void kperf_signal_handler(void) { kptimer_sample_pet_remote(NULL); } #endif /* !defined(__x86_64__) */ #include _Atomic uint64_t mycounter = 0; static void kptimer_broadcast_pet(void) { atomic_fetch_add(&mycounter, 1); #if defined(__x86_64__) (void)mp_cpus_call(CPUMASK_OTHERS, NOSYNC, kptimer_sample_pet_remote, NULL); #else /* defined(__x86_64__) */ int curcpu = cpu_number(); for (int i = 0; i < machine_info.logical_cpu_max; i++) { if (i != curcpu) { cpu_signal(cpu_datap(i), SIGPkppet, NULL, NULL); } } #endif /* !defined(__x86_64__) */ } static void kptimer_pet_handler(void * __unused param1, void * __unused param2) { if (!kptimer.g_pet_active) { return; } struct kptimer *timer = &kptimer.g_timers[kptimer.g_pet_timerid]; BUF_DATA(PERF_TM_FIRE, kptimer.g_pet_timerid, 1, timer->kt_period_abs, timer->kt_actionid); /* * To get the on-CPU samples as close to this timer fire as possible, first * broadcast to them to sample themselves. */ kptimer_broadcast_pet(); /* * Wakeup the PET thread afterwards so it's not inadvertently sampled (it's a * high-priority kernel thread). If the scheduler needs to IPI to run it, * that IPI will be handled after the IPIs issued during the broadcast. */ kppet_wake_thread(); /* * Finally, sample this CPU, who's stacks and state have been preserved while * running this handler. Make sure to include system measurements. */ kptimer_sample_curcpu(timer->kt_actionid, kptimer.g_pet_timerid, SAMPLE_FLAG_SYSTEM); BUF_INFO(PERF_TM_FIRE | DBG_FUNC_END); /* * The PET thread will re-arm the timer when it's done. */ } void kptimer_pet_enter(uint64_t sampledur_abs) { if (!kperf_is_sampling()) { return; } uint64_t period_abs = kptimer.g_timers[kptimer.g_pet_timerid].kt_period_abs; uint64_t orig_period_abs = period_abs; if (period_abs > sampledur_abs) { period_abs -= sampledur_abs; } period_abs = MAX(kptimer_min_period_abs(true), period_abs); uint64_t deadline_abs = mach_absolute_time() + period_abs; BUF_INFO(PERF_PET_SCHED, orig_period_abs, period_abs, sampledur_abs, deadline_abs); timer_call_enter(&kptimer.g_pet_timer, deadline_abs, TIMER_CALL_SYS_CRITICAL); } static uint64_t kptimer_earliest_deadline(processor_t processor, uint64_t now) { uint64_t min_deadline = UINT64_MAX; for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { struct kptimer *timer = &kptimer.g_timers[i]; uint64_t cur_deadline = timer->kt_cur_deadline; if (cur_deadline == 0) { continue; } cur_deadline = dead_reckon_deadline(now, cur_deadline, timer->kt_period_abs); kptimer_set_cpu_deadline(processor->cpu_id, i, cur_deadline); if (cur_deadline < min_deadline) { min_deadline = cur_deadline; } } return min_deadline; } void kptimer_running_setup(processor_t processor, uint64_t now); void kptimer_running_setup(processor_t processor, uint64_t now) { uint64_t deadline = kptimer_earliest_deadline(processor, now); if (deadline < UINT64_MAX) { running_timer_setup(processor, RUNNING_TIMER_KPERF, NULL, deadline, now); } } static void kptimer_start_cpu(processor_t processor) { uint64_t now = mach_absolute_time(); uint64_t deadline = kptimer_earliest_deadline(processor, now); if (deadline < UINT64_MAX) { running_timer_enter(processor, RUNNING_TIMER_KPERF, NULL, deadline, now); } } static void kptimer_start_remote(void *arg) { kptimer_start_cpu(current_processor()); kptimer_broadcast_ack(arg); } static void kptimer_stop_cpu(processor_t processor) { for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { kptimer_set_cpu_deadline(processor->cpu_id, i, EndOfAllTime); } running_timer_cancel(processor, RUNNING_TIMER_KPERF); } void kptimer_stop_curcpu(void) { kptimer_stop_cpu(current_processor()); } static void kptimer_stop_remote(void * __unused arg) { assert(ml_get_interrupts_enabled() == FALSE); kptimer_stop_cpu(current_processor()); kptimer_broadcast_ack(arg); } /* * Called when a CPU is brought online. Handles the cases where the kperf timer may have * been either enabled or disabled while the CPU was offline (preventing the enabling/disabling * IPIs from reaching this CPU). */ void kptimer_curcpu_up(void) { enum kperf_sampling status = os_atomic_load(&kperf_status, acquire); processor_t processor = current_processor(); assert(ml_get_interrupts_enabled() == FALSE); /* * If the CPU was taken offline, THEN kperf was enabled, this CPU would have missed * the enabling IPI, so fix that here. Also, if the CPU was taken offline (after having * enabled kperf), recompute the deadline (since we may have missed a timer update) and * keep the timer enabled. */ if (status == KPERF_SAMPLING_ON) { kptimer_start_cpu(processor); } else { /* * Similarly, If the CPU is resuming after having previously armed the kperf timer * before going down, and kperf is currently disabled, disable the kperf running * timer on this CPU. */ kptimer_stop_cpu(processor); } } void kptimer_start(void) { ktrace_assert_lock_held(); if (kptimer.g_started) { return; } uint64_t now = mach_absolute_time(); unsigned int ntimers_active = 0; kptimer.g_started = true; for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { struct kptimer *timer = &kptimer.g_timers[i]; if (timer->kt_period_abs == 0 || timer->kt_actionid == 0) { /* * No period or action means the timer is inactive. */ continue; } else if (!kppet_get_lightweight_pet() && i == kptimer.g_pet_timerid) { kptimer.g_pet_active = true; timer_call_enter(&kptimer.g_pet_timer, now + timer->kt_period_abs, TIMER_CALL_SYS_CRITICAL); } else { timer->kt_cur_deadline = now + timer->kt_period_abs; ntimers_active++; } } if (ntimers_active > 0) { kptimer_broadcast(kptimer_start_remote); } } void kptimer_stop(void) { ktrace_assert_lock_held(); if (!kptimer.g_started) { return; } int intrs_en = ml_set_interrupts_enabled(FALSE); if (kptimer.g_pet_active) { kptimer.g_pet_active = false; timer_call_cancel(&kptimer.g_pet_timer); } kptimer.g_started = false; kptimer_broadcast(kptimer_stop_remote); for (unsigned int i = 0; i < kptimer.g_ntimers; i++) { kptimer.g_timers[i].kt_cur_deadline = 0; } ml_set_interrupts_enabled(intrs_en); } #pragma mark - accessors int kptimer_get_period(unsigned int timerid, uint64_t *period_abs) { if (timerid >= kptimer.g_ntimers) { return EINVAL; } *period_abs = kptimer.g_timers[timerid].kt_period_abs; return 0; } int kptimer_set_period(unsigned int timerid, uint64_t period_abs) { if (timerid >= kptimer.g_ntimers) { return EINVAL; } if (kptimer.g_started) { return EBUSY; } bool pet = kptimer.g_pet_timerid == timerid; uint64_t min_period = kptimer_min_period_abs(pet); if (period_abs != 0 && period_abs < min_period) { period_abs = min_period; } if (pet && !kppet_get_lightweight_pet()) { kppet_config(kptimer.g_timers[timerid].kt_actionid); } kptimer.g_timers[timerid].kt_period_abs = period_abs; return 0; } int kptimer_get_action(unsigned int timerid, unsigned int *actionid) { if (timerid >= kptimer.g_ntimers) { return EINVAL; } *actionid = kptimer.g_timers[timerid].kt_actionid; return 0; } int kptimer_set_action(unsigned int timerid, unsigned int actionid) { if (timerid >= kptimer.g_ntimers) { return EINVAL; } if (kptimer.g_started) { return EBUSY; } kptimer.g_timers[timerid].kt_actionid = actionid; if (kptimer.g_pet_timerid == timerid && !kppet_get_lightweight_pet()) { kppet_config(actionid); } return 0; } unsigned int kptimer_get_count(void) { return kptimer.g_ntimers; } int kptimer_set_count(unsigned int count) { kptimer_setup(); if (kptimer.g_started) { return EBUSY; } if (count > KPTIMER_MAX) { return EINVAL; } kptimer.g_ntimers = count; return 0; } uint64_t kptimer_min_period_abs(bool pet) { enum kptimer_period_limit limit = 0; if (ktrace_background_active()) { limit = pet ? KTPL_BG_PET : KTPL_BG; } else { limit = pet ? KTPL_FG_PET : KTPL_FG; } return kptimer_minperiods_mtu[limit]; } uint32_t kptimer_get_pet_timerid(void) { return kptimer.g_pet_timerid; } int kptimer_set_pet_timerid(uint32_t petid) { if (kptimer.g_started) { return EBUSY; } if (petid >= kptimer.g_ntimers) { kppet_config(0); } else { kppet_config(kptimer.g_timers[petid].kt_actionid); uint64_t period_abs = MAX(kptimer_min_period_abs(true), kptimer.g_timers[petid].kt_period_abs); kptimer.g_timers[petid].kt_period_abs = period_abs; } kptimer.g_pet_timerid = petid; return 0; }