/* * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include #include #include #include #include #include #include #include #include #include #include #include static KALLOC_TYPE_DEFINE(thread_qos_override_zone, struct thread_qos_override, KT_DEFAULT); #ifdef MACH_BSD extern int proc_selfpid(void); extern char * proc_name_address(void *p); extern void rethrottle_thread(void * uthread); #endif /* MACH_BSD */ #define QOS_EXTRACT(q) ((q) & 0xff) #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode", QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE); static void proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset); const int thread_default_iotier_override = THROTTLE_LEVEL_END; const struct thread_requested_policy default_thread_requested_policy = { .thrp_iotier_kevent_override = thread_default_iotier_override }; /* * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit * to threads that don't have a QoS class set. */ const qos_policy_params_t thread_qos_policy_params = { /* * This table defines the starting base priority of the thread, * which will be modified by the thread importance and the task max priority * before being applied. */ .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */ .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */ .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED, .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT, .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY, .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE, .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE, /* * This table defines the highest IO priority that a thread marked with this * QoS class can have. */ .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0, .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0, .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0, .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0, .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1, .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */ .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3, /* * This table defines the highest QoS level that * a thread marked with this QoS class can have. */ .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED), .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0), .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1), .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2), .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5), .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED), .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0), .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1), .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1), .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3), .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3), .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3), }; static void thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode); static int thread_qos_scaled_relative_priority(int qos, int qos_relprio); static void proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info); static void proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); static void proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); static void thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token); static int thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2); static int proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2); static void thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token); static void thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token); boolean_t thread_has_qos_policy(thread_t thread) { return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE; } static void thread_remove_qos_policy_locked(thread_t thread, task_pend_token_t pend_token) { __unused int prev_qos = thread->requested_policy.thrp_qos; DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos); proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, THREAD_QOS_UNSPECIFIED, 0, pend_token); } kern_return_t thread_remove_qos_policy(thread_t thread) { struct task_pend_token pend_token = {}; thread_mtx_lock(thread); if (!thread->active) { thread_mtx_unlock(thread); return KERN_TERMINATED; } thread_remove_qos_policy_locked(thread, &pend_token); thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); return KERN_SUCCESS; } boolean_t thread_is_static_param(thread_t thread) { if (thread->static_param) { DTRACE_PROC1(qos__legacy__denied, thread_t, thread); return TRUE; } return FALSE; } /* * Relative priorities can range between 0REL and -15REL. These * map to QoS-specific ranges, to create non-overlapping priority * ranges. */ static int thread_qos_scaled_relative_priority(int qos, int qos_relprio) { int next_lower_qos; /* Fast path, since no validation or scaling is needed */ if (qos_relprio == 0) { return 0; } switch (qos) { case THREAD_QOS_USER_INTERACTIVE: next_lower_qos = THREAD_QOS_USER_INITIATED; break; case THREAD_QOS_USER_INITIATED: next_lower_qos = THREAD_QOS_LEGACY; break; case THREAD_QOS_LEGACY: next_lower_qos = THREAD_QOS_UTILITY; break; case THREAD_QOS_UTILITY: next_lower_qos = THREAD_QOS_BACKGROUND; break; case THREAD_QOS_MAINTENANCE: case THREAD_QOS_BACKGROUND: next_lower_qos = 0; break; default: panic("Unrecognized QoS %d", qos); return 0; } int prio_range_max = thread_qos_policy_params.qos_pri[qos]; int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0; /* * We now have the valid range that the scaled relative priority can map to. Note * that the lower bound is exclusive, but the upper bound is inclusive. If the * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard * remainder. */ int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4); return scaled_relprio; } /* * flag set by -qos-policy-allow boot-arg to allow * testing thread qos policy from userspace */ static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false); kern_return_t thread_policy_set( thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t count) { thread_qos_policy_data_t req_qos; kern_return_t kr; req_qos.qos_tier = THREAD_QOS_UNSPECIFIED; if (thread == THREAD_NULL) { return KERN_INVALID_ARGUMENT; } if (!allow_qos_policy_set) { if (thread_is_static_param(thread)) { return KERN_POLICY_STATIC; } if (flavor == THREAD_QOS_POLICY) { return KERN_INVALID_ARGUMENT; } if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) { return KERN_INVALID_ARGUMENT; } thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info; if (info->priority != BASEPRI_RTQUEUES) { return KERN_INVALID_ARGUMENT; } } } if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { thread_work_interval_flags_t th_wi_flags = os_atomic_load( &thread->th_work_interval_flags, relaxed); if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) && !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) { /* Fail requests to become realtime for threads having joined workintervals * with workload ID that don't have the rt-allowed flag. */ return KERN_INVALID_POLICY; } } /* Threads without static_param set reset their QoS when other policies are applied. */ if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { /* Store the existing tier, if we fail this call it is used to reset back. */ req_qos.qos_tier = thread->requested_policy.thrp_qos; req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio; kr = thread_remove_qos_policy(thread); if (kr != KERN_SUCCESS) { return kr; } } kr = thread_policy_set_internal(thread, flavor, policy_info, count); if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) { if (kr != KERN_SUCCESS) { /* Reset back to our original tier as the set failed. */ (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT); } } return kr; } static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period)); static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation)); static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint)); static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible)); kern_return_t thread_policy_set_internal( thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t count) { kern_return_t result = KERN_SUCCESS; struct task_pend_token pend_token = {}; thread_mtx_lock(thread); if (!thread->active) { thread_mtx_unlock(thread); return KERN_TERMINATED; } switch (flavor) { case THREAD_EXTENDED_POLICY: { boolean_t timeshare = TRUE; if (count >= THREAD_EXTENDED_POLICY_COUNT) { thread_extended_policy_t info; info = (thread_extended_policy_t)policy_info; timeshare = info->timeshare; } sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED; spl_t s = splsched(); thread_lock(thread); thread_set_user_sched_mode_and_recompute_pri(thread, mode); thread_unlock(thread); splx(s); /* * The thread may be demoted with RT_DISALLOWED but has just * changed its sched mode to TIMESHARE or FIXED. Make sure to * undemote the thread so the new sched mode takes effect. */ thread_rt_evaluate(thread); pend_token.tpt_update_thread_sfi = 1; break; } case THREAD_TIME_CONSTRAINT_POLICY: case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY: { thread_time_constraint_with_priority_policy_t info; mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ? THREAD_TIME_CONSTRAINT_POLICY_COUNT : THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT); if (count < min_count) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_time_constraint_with_priority_policy_t)policy_info; if (info->constraint < info->computation || info->computation > max_rt_quantum || info->computation < min_rt_quantum) { result = KERN_INVALID_ARGUMENT; break; } if (info->computation < (info->constraint / 2)) { info->computation = (info->constraint / 2); if (info->computation > max_rt_quantum) { info->computation = max_rt_quantum; } } if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) { result = KERN_INVALID_ARGUMENT; break; } } spl_t s = splsched(); thread_lock(thread); thread->realtime.period = info->period; thread->realtime.computation = info->computation; thread->realtime.constraint = info->constraint; thread->realtime.preemptible = info->preemptible; /* * If the thread has a work interval driven policy, the priority * offset has been set by the work interval. */ if (!thread->requested_policy.thrp_wi_driven) { if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES); } else { thread->realtime.priority_offset = 0; } } thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME); thread_unlock(thread); splx(s); thread_rt_evaluate(thread); pend_token.tpt_update_thread_sfi = 1; break; } case THREAD_PRECEDENCE_POLICY: { thread_precedence_policy_t info; if (count < THREAD_PRECEDENCE_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_precedence_policy_t)policy_info; spl_t s = splsched(); thread_lock(thread); thread->importance = info->importance; thread_recompute_priority(thread); thread_unlock(thread); splx(s); break; } case THREAD_AFFINITY_POLICY: { extern boolean_t affinity_sets_enabled; thread_affinity_policy_t info; if (!affinity_sets_enabled) { result = KERN_INVALID_POLICY; break; } if (!thread_affinity_is_supported()) { result = KERN_NOT_SUPPORTED; break; } if (count < THREAD_AFFINITY_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_affinity_policy_t) policy_info; /* * Unlock the thread mutex here and * return directly after calling thread_affinity_set(). * This is necessary for correct lock ordering because * thread_affinity_set() takes the task lock. */ thread_mtx_unlock(thread); return thread_affinity_set(thread, info->affinity_tag); } #if !defined(XNU_TARGET_OS_OSX) case THREAD_BACKGROUND_POLICY: { thread_background_policy_t info; if (count < THREAD_BACKGROUND_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if (get_threadtask(thread) != current_task()) { result = KERN_PROTECTION_FAILURE; break; } info = (thread_background_policy_t) policy_info; int enable; if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) { enable = TASK_POLICY_ENABLE; } else { enable = TASK_POLICY_DISABLE; } int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token); break; } #endif /* !defined(XNU_TARGET_OS_OSX) */ case THREAD_THROUGHPUT_QOS_POLICY: { thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; thread_throughput_qos_t tqos; if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) { break; } tqos = qos_extract(info->thread_throughput_qos_tier); proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token); break; } case THREAD_LATENCY_QOS_POLICY: { thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; thread_latency_qos_t lqos; if (count < THREAD_LATENCY_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) { break; } lqos = qos_extract(info->thread_latency_qos_tier); proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token); break; } case THREAD_QOS_POLICY: { thread_qos_policy_t info = (thread_qos_policy_t)policy_info; if (count < THREAD_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) { result = KERN_INVALID_ARGUMENT; break; } if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { result = KERN_INVALID_ARGUMENT; break; } if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) { result = KERN_INVALID_ARGUMENT; break; } proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, info->qos_tier, -info->tier_importance, &pend_token); break; } default: result = KERN_INVALID_ARGUMENT; break; } thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); return result; } /* * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO. * Both result in FIXED mode scheduling. */ static sched_mode_t convert_policy_to_sched_mode(integer_t policy) { switch (policy) { case POLICY_TIMESHARE: return TH_MODE_TIMESHARE; case POLICY_RR: case POLICY_FIFO: return TH_MODE_FIXED; default: panic("unexpected sched policy: %d", policy); return TH_MODE_NONE; } } /* * Called either with the thread mutex locked * or from the pthread kext in a 'safe place'. */ static kern_return_t thread_set_mode_and_absolute_pri_internal(thread_t thread, sched_mode_t mode, integer_t priority, task_pend_token_t pend_token) { kern_return_t kr = KERN_SUCCESS; spl_t s = splsched(); thread_lock(thread); /* This path isn't allowed to change a thread out of realtime. */ if ((thread->sched_mode == TH_MODE_REALTIME) || (thread->saved_mode == TH_MODE_REALTIME)) { kr = KERN_FAILURE; goto unlock; } if (thread->policy_reset) { kr = KERN_SUCCESS; goto unlock; } sched_mode_t old_mode = thread->sched_mode; integer_t old_base_pri = thread->base_pri; integer_t old_sched_pri = thread->sched_pri; /* * Reverse engineer and apply the correct importance value * from the requested absolute priority value. * * TODO: Store the absolute priority value instead */ if (priority >= thread->max_priority) { priority = thread->max_priority - thread->task_priority; } else if (priority >= MINPRI_KERNEL) { priority -= MINPRI_KERNEL; } else if (priority >= MINPRI_RESERVED) { priority -= MINPRI_RESERVED; } else { priority -= BASEPRI_DEFAULT; } priority += thread->task_priority; if (priority > thread->max_priority) { priority = thread->max_priority; } else if (priority < MINPRI) { priority = MINPRI; } thread->importance = priority - thread->task_priority; thread_set_user_sched_mode_and_recompute_pri(thread, mode); if (mode != old_mode) { pend_token->tpt_update_thread_sfi = 1; } if (thread->base_pri != old_base_pri || thread->sched_pri != old_sched_pri) { pend_token->tpt_update_turnstile = 1; } unlock: thread_unlock(thread); splx(s); return kr; } void thread_freeze_base_pri(thread_t thread) { assert(thread == current_thread()); spl_t s = splsched(); thread_lock(thread); assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0); thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN; thread_unlock(thread); splx(s); } bool thread_unfreeze_base_pri(thread_t thread) { assert(thread == current_thread()); integer_t base_pri; ast_t ast = 0; spl_t s = splsched(); thread_lock(thread); assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN); thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN; base_pri = thread->req_base_pri; if (base_pri != thread->base_pri) { /* * This function returns "true" if the base pri change * is the most likely cause for the preemption. */ sched_set_thread_base_priority(thread, base_pri); ast = ast_peek(AST_PREEMPT); } thread_unlock(thread); splx(s); return ast != 0; } uint8_t thread_workq_pri_for_qos(thread_qos_t qos) { assert(qos < THREAD_QOS_LAST); return (uint8_t)thread_qos_policy_params.qos_pri[qos]; } thread_qos_t thread_workq_qos_for_pri(int priority) { thread_qos_t qos; if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) { // indicate that workq should map >UI threads to workq's // internal notation for above-UI work. return THREAD_QOS_UNSPECIFIED; } for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) { // map a given priority up to the next nearest qos band. if (thread_qos_policy_params.qos_pri[qos - 1] < priority) { return qos; } } return THREAD_QOS_MAINTENANCE; } /* * private interface for pthread workqueues * * Set scheduling policy & absolute priority for thread * May be called with spinlocks held * Thread mutex lock is not held */ void thread_reset_workq_qos(thread_t thread, uint32_t qos) { struct task_pend_token pend_token = {}; assert(qos < THREAD_QOS_LAST); spl_t s = splsched(); thread_lock(thread); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0, &pend_token); assert(pend_token.tpt_update_sockets == 0); thread_unlock(thread); splx(s); thread_policy_update_complete_unlocked(thread, &pend_token); } /* * private interface for pthread workqueues * * Set scheduling policy & absolute priority for thread * May be called with spinlocks held * Thread mutex lock is held */ void thread_set_workq_override(thread_t thread, uint32_t qos) { struct task_pend_token pend_token = {}; assert(qos < THREAD_QOS_LAST); spl_t s = splsched(); thread_lock(thread); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token); assert(pend_token.tpt_update_sockets == 0); thread_unlock(thread); splx(s); thread_policy_update_complete_unlocked(thread, &pend_token); } /* * private interface for pthread workqueues * * Set scheduling policy & absolute priority for thread * May be called with spinlocks held * Thread mutex lock is not held */ void thread_set_workq_pri(thread_t thread, thread_qos_t qos, integer_t priority, integer_t policy) { struct task_pend_token pend_token = {}; sched_mode_t mode = convert_policy_to_sched_mode(policy); assert(qos < THREAD_QOS_LAST); assert(thread->static_param); if (!thread->static_param || !thread->active) { return; } spl_t s = splsched(); thread_lock(thread); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0, &pend_token); thread_unlock(thread); splx(s); /* Concern: this doesn't hold the mutex... */ __assert_only kern_return_t kr; kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token); assert(kr == KERN_SUCCESS); assert(pend_token.tpt_update_sockets == 0); thread_policy_update_complete_unlocked(thread, &pend_token); } /* * thread_set_mode_and_absolute_pri: * * Set scheduling policy & absolute priority for thread, for deprecated * thread_set_policy and thread_policy interfaces. * * Called with nothing locked. */ kern_return_t thread_set_mode_and_absolute_pri(thread_t thread, integer_t policy, integer_t priority) { kern_return_t kr = KERN_SUCCESS; struct task_pend_token pend_token = {}; sched_mode_t mode = convert_policy_to_sched_mode(policy); thread_mtx_lock(thread); if (!thread->active) { kr = KERN_TERMINATED; goto unlock; } if (thread_is_static_param(thread)) { kr = KERN_POLICY_STATIC; goto unlock; } /* Setting legacy policies on threads kills the current QoS */ if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) { thread_remove_qos_policy_locked(thread, &pend_token); } kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token); unlock: thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); return kr; } /* * Set the thread's requested mode and recompute priority * Called with thread mutex and thread locked * * TODO: Mitigate potential problems caused by moving thread to end of runq * whenever its priority is recomputed * Only remove when it actually changes? Attempt to re-insert at appropriate location? */ static void thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode) { if (thread->policy_reset) { return; } boolean_t removed = thread_run_queue_remove(thread); sched_set_thread_mode_user(thread, mode); thread_recompute_priority(thread); if (removed) { thread_run_queue_reinsert(thread, SCHED_TAILQ); } } /* called at splsched with thread lock locked */ static void thread_update_qos_cpu_time_locked(thread_t thread) { task_t task = get_threadtask(thread); uint64_t timer_sum, timer_delta; /* * This is only as accurate the thread's last context switch or user/kernel * transition (unless precise user/kernel time is disabled). * * TODO: Consider running an update operation here to update it first. * Maybe doable with interrupts disabled from current thread. * If the thread is on a different core, may not be easy to get right. */ timer_sum = recount_thread_time_mach(thread); timer_delta = timer_sum - thread->vtimer_qos_save; thread->vtimer_qos_save = timer_sum; uint64_t* task_counter = NULL; /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */ switch (thread->effective_policy.thep_qos) { case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break; case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break; case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break; case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break; case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break; case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break; case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break; default: panic("unknown effective QoS: %d", thread->effective_policy.thep_qos); } OSAddAtomic64(timer_delta, task_counter); /* Update the task-level qos stats atomically, because we don't have the task lock. */ switch (thread->requested_policy.thrp_qos) { case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break; case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break; case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break; case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break; case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break; case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break; case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break; default: panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos); } OSAddAtomic64(timer_delta, task_counter); } /* * called with no thread locks held * may hold task lock */ void thread_update_qos_cpu_time(thread_t thread) { thread_mtx_lock(thread); spl_t s = splsched(); thread_lock(thread); thread_update_qos_cpu_time_locked(thread); thread_unlock(thread); splx(s); thread_mtx_unlock(thread); } /* * Calculate base priority from thread attributes, and set it on the thread * * Called with thread_lock and thread mutex held. */ void thread_recompute_priority( thread_t thread) { integer_t priority; integer_t adj_priority; bool wi_priority = false; if (thread->policy_reset) { return; } if (thread->sched_mode == TH_MODE_REALTIME) { uint8_t i = thread->realtime.priority_offset; assert((i >= 0) && (i < NRTQS)); priority = BASEPRI_RTQUEUES + i; sched_set_thread_base_priority(thread, priority); if (thread->realtime.deadline == RT_DEADLINE_NONE) { /* Make sure the thread has a valid deadline */ uint64_t ctime = mach_absolute_time(); thread->realtime.deadline = thread->realtime.constraint + ctime; KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1); } return; /* * A thread may have joined a RT work interval but then never * changed its sched mode or have been demoted. RT work * intervals will have RT priorities - ignore the priority if * the thread isn't RT. */ } else if (thread->effective_policy.thep_wi_driven && work_interval_get_priority(thread) < BASEPRI_RTQUEUES) { priority = work_interval_get_priority(thread); wi_priority = true; } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) { int qos = thread->effective_policy.thep_qos; int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent; int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */ int qos_scaled_relprio; assert(qos >= 0 && qos < THREAD_QOS_LAST); assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE); priority = thread_qos_policy_params.qos_pri[qos]; qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio); if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) { /* Bump priority 46 to 47 when in a frontmost app */ qos_scaled_relprio += 1; } /* TODO: factor in renice priority here? */ priority += qos_scaled_relprio; } else { if (thread->importance > MAXPRI) { priority = MAXPRI; } else if (thread->importance < -MAXPRI) { priority = -MAXPRI; } else { priority = thread->importance; } priority += thread->task_priority; } /* Boost the priority of threads which are RT demoted. */ if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) { priority = MAX(priority, MAXPRI_USER); } priority = MAX(priority, thread->user_promotion_basepri); /* * Clamp priority back into the allowed range for this task. * The initial priority value could be out of this range due to: * Task clamped to BG or Utility (max-pri is 4, or 20) * Task is user task (max-pri is 63) * Task is kernel task (max-pri is 95) * Note that thread->importance is user-settable to any integer * via THREAD_PRECEDENCE_POLICY. */ adj_priority = priority; adj_priority = MIN(adj_priority, thread->max_priority); adj_priority = MAX(adj_priority, MINPRI); /* Allow workload driven priorities to exceed max_priority. */ if (wi_priority) { adj_priority = MAX(adj_priority, priority); } /* Allow priority to exceed max_priority for promotions. */ if (thread->effective_policy.thep_promote_above_task) { adj_priority = MAX(adj_priority, thread->user_promotion_basepri); } priority = adj_priority; assert3u(priority, <=, MAXPRI); if (thread->saved_mode == TH_MODE_REALTIME && sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) { priority = DEPRESSPRI; } if (thread->effective_policy.thep_terminated == TRUE) { /* * We temporarily want to override the expected priority to * ensure that the thread exits in a timely manner. * Note that this is allowed to exceed thread->max_priority * so that the thread is no longer clamped to background * during the final exit phase. */ if (priority < thread->task_priority) { priority = thread->task_priority; } if (priority < BASEPRI_DEFAULT) { priority = BASEPRI_DEFAULT; } } #if !defined(XNU_TARGET_OS_OSX) /* No one can have a base priority less than MAXPRI_THROTTLE */ if (priority < MAXPRI_THROTTLE) { priority = MAXPRI_THROTTLE; } #endif /* !defined(XNU_TARGET_OS_OSX) */ sched_set_thread_base_priority(thread, priority); } /* Called with the task lock held, but not the thread mutex or spinlock */ void thread_policy_update_tasklocked( thread_t thread, integer_t priority, integer_t max_priority, task_pend_token_t pend_token) { thread_mtx_lock(thread); if (!thread->active || thread->policy_reset) { thread_mtx_unlock(thread); return; } spl_t s = splsched(); thread_lock(thread); __unused integer_t old_max_priority = thread->max_priority; assert(priority >= INT16_MIN && priority <= INT16_MAX); thread->task_priority = (int16_t)priority; assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX); thread->max_priority = (int16_t)max_priority; /* * When backgrounding a thread, realtime and fixed priority threads * should be demoted to timeshare background threads. * * TODO: Do this inside the thread policy update routine in order to avoid double * remove/reinsert for a runnable thread */ if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) { sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED); } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) { sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED); } thread_policy_update_spinlocked(thread, true, pend_token); thread_unlock(thread); splx(s); thread_mtx_unlock(thread); } /* * Reset thread to default state in preparation for termination * Called with thread mutex locked * * Always called on current thread, so we don't need a run queue remove */ void thread_policy_reset( thread_t thread) { spl_t s; assert(thread == current_thread()); s = splsched(); thread_lock(thread); if (thread->sched_flags & TH_SFLAG_FAILSAFE) { sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE); } if (thread->sched_flags & TH_SFLAG_THROTTLED) { sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED); } if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) { sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED); } /* At this point, the various demotions should be inactive */ assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)); assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)); /* Reset thread back to task-default basepri and mode */ sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread)); sched_set_thread_mode(thread, newmode); thread->importance = 0; /* Prevent further changes to thread base priority or mode */ thread->policy_reset = 1; sched_set_thread_base_priority(thread, thread->task_priority); thread_unlock(thread); splx(s); } kern_return_t thread_policy_get( thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t *count, boolean_t *get_default) { kern_return_t result = KERN_SUCCESS; if (thread == THREAD_NULL) { return KERN_INVALID_ARGUMENT; } thread_mtx_lock(thread); if (!thread->active) { thread_mtx_unlock(thread); return KERN_TERMINATED; } switch (flavor) { case THREAD_EXTENDED_POLICY: { boolean_t timeshare = TRUE; if (!(*get_default)) { spl_t s = splsched(); thread_lock(thread); if ((thread->sched_mode != TH_MODE_REALTIME) && (thread->saved_mode != TH_MODE_REALTIME)) { if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) { timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0; } else { timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0; } } else { *get_default = TRUE; } thread_unlock(thread); splx(s); } if (*count >= THREAD_EXTENDED_POLICY_COUNT) { thread_extended_policy_t info; info = (thread_extended_policy_t)policy_info; info->timeshare = timeshare; } break; } case THREAD_TIME_CONSTRAINT_POLICY: case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY: { thread_time_constraint_with_priority_policy_t info; mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ? THREAD_TIME_CONSTRAINT_POLICY_COUNT : THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT); if (*count < min_count) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_time_constraint_with_priority_policy_t)policy_info; if (!(*get_default)) { spl_t s = splsched(); thread_lock(thread); if ((thread->sched_mode == TH_MODE_REALTIME) || (thread->saved_mode == TH_MODE_REALTIME)) { info->period = thread->realtime.period; info->computation = thread->realtime.computation; info->constraint = thread->realtime.constraint; info->preemptible = thread->realtime.preemptible; if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES; } } else { *get_default = TRUE; } thread_unlock(thread); splx(s); } if (*get_default) { info->period = 0; info->computation = default_timeshare_computation; info->constraint = default_timeshare_constraint; info->preemptible = TRUE; if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) { info->priority = BASEPRI_RTQUEUES; } } break; } case THREAD_PRECEDENCE_POLICY: { thread_precedence_policy_t info; if (*count < THREAD_PRECEDENCE_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_precedence_policy_t)policy_info; if (!(*get_default)) { spl_t s = splsched(); thread_lock(thread); info->importance = thread->importance; thread_unlock(thread); splx(s); } else { info->importance = 0; } break; } case THREAD_AFFINITY_POLICY: { thread_affinity_policy_t info; if (!thread_affinity_is_supported()) { result = KERN_NOT_SUPPORTED; break; } if (*count < THREAD_AFFINITY_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } info = (thread_affinity_policy_t)policy_info; if (!(*get_default)) { info->affinity_tag = thread_affinity_get(thread); } else { info->affinity_tag = THREAD_AFFINITY_TAG_NULL; } break; } case THREAD_POLICY_STATE: { thread_policy_state_t info; if (*count < THREAD_POLICY_STATE_COUNT) { result = KERN_INVALID_ARGUMENT; break; } /* Only root can get this info */ if (!task_is_privileged(current_task())) { result = KERN_PROTECTION_FAILURE; break; } info = (thread_policy_state_t)(void*)policy_info; if (!(*get_default)) { info->flags = 0; spl_t s = splsched(); thread_lock(thread); info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0); info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy); info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy); info->thps_user_promotions = 0; info->thps_user_promotion_basepri = thread->user_promotion_basepri; info->thps_ipc_overrides = thread->kevent_overrides; proc_get_thread_policy_bitfield(thread, info); thread_unlock(thread); splx(s); } else { info->requested = 0; info->effective = 0; info->pending = 0; } break; } case THREAD_REQUESTED_STATE_POLICY: { if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info; struct thread_requested_policy *req_policy = &thread->requested_policy; info->thrq_base_qos = req_policy->thrp_qos; info->thrq_qos_relprio = req_policy->thrp_qos_relprio; info->thrq_qos_override = req_policy->thrp_qos_override; info->thrq_qos_promote = req_policy->thrp_qos_promote; info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override; info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override; info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override; break; } case THREAD_LATENCY_QOS_POLICY: { thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info; thread_latency_qos_t plqos; if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if (*get_default) { plqos = 0; } else { plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL); } info->thread_latency_qos_tier = qos_latency_policy_package(plqos); } break; case THREAD_THROUGHPUT_QOS_POLICY: { thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info; thread_throughput_qos_t ptqos; if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if (*get_default) { ptqos = 0; } else { ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL); } info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos); } break; case THREAD_QOS_POLICY: { thread_qos_policy_t info = (thread_qos_policy_t)policy_info; if (*count < THREAD_QOS_POLICY_COUNT) { result = KERN_INVALID_ARGUMENT; break; } if (!(*get_default)) { int relprio_value = 0; info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, &relprio_value); info->tier_importance = -relprio_value; } else { info->qos_tier = THREAD_QOS_UNSPECIFIED; info->tier_importance = 0; } break; } default: result = KERN_INVALID_ARGUMENT; break; } thread_mtx_unlock(thread); return result; } void thread_policy_create(thread_t thread) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START, thread_tid(thread), theffective_0(thread), theffective_1(thread), thread->base_pri, 0); /* We pass a pend token but ignore it */ struct task_pend_token pend_token = {}; thread_policy_update_internal_spinlocked(thread, true, &pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END, thread_tid(thread), theffective_0(thread), theffective_1(thread), thread->base_pri, 0); } static void thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START), thread_tid(thread), theffective_0(thread), theffective_1(thread), thread->base_pri, 0); thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END, thread_tid(thread), theffective_0(thread), theffective_1(thread), thread->base_pri, 0); } /* * One thread state update function TO RULE THEM ALL * * This function updates the thread effective policy fields * and pushes the results to the relevant subsystems. * * Called with thread spinlock locked, task may be locked, thread mutex may be locked */ static void thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token) { /* * Step 1: * Gather requested policy and effective task state */ const struct thread_requested_policy requested = thread->requested_policy; const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy; /* * Step 2: * Calculate new effective policies from requested policy, task and thread state * Rules: * Don't change requested, it won't take effect */ struct thread_effective_policy next = {}; next.thep_wi_driven = requested.thrp_wi_driven; next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent; uint32_t next_qos = requested.thrp_qos; if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) { next_qos = MAX(requested.thrp_qos_override, next_qos); next_qos = MAX(requested.thrp_qos_promote, next_qos); next_qos = MAX(requested.thrp_qos_kevent_override, next_qos); next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos); next_qos = MAX(requested.thrp_qos_workq_override, next_qos); } if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg && requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) { /* * This thread is turnstile-boosted higher than the adaptive clamp * by a synchronous waiter. Allow that to override the adaptive * clamp temporarily for this thread only. */ next.thep_promote_above_task = true; next_qos = requested.thrp_qos_promote; } next.thep_qos = next_qos; /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */ if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) { if (next.thep_qos != THREAD_QOS_UNSPECIFIED) { next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos); } else { next.thep_qos = task_effective.tep_qos_clamp; } next.thep_wi_driven = 0; } /* * Extract outbound-promotion QoS before applying task ceiling or BG clamp * This allows QoS promotions to work properly even after the process is unclamped. */ next.thep_qos_promote = next.thep_qos; /* The ceiling only applies to threads that are in the QoS world */ /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */ if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED && next.thep_qos != THREAD_QOS_UNSPECIFIED) { next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos); } /* * The QoS relative priority is only applicable when the original programmer's * intended (requested) QoS is in effect. When the QoS is clamped (e.g. * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored, * since otherwise it would be lower than unclamped threads. Similarly, in the * presence of boosting, the programmer doesn't know what other actors * are boosting the thread. */ if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos == next.thep_qos) && (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) { next.thep_qos_relprio = requested.thrp_qos_relprio; } else { next.thep_qos_relprio = 0; } /* Calculate DARWIN_BG */ bool wants_darwinbg = false; bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */ if (task_effective.tep_darwinbg && !next.thep_promote_above_task) { wants_darwinbg = true; } /* * If DARWIN_BG has been requested at either level, it's engaged. * darwinbg threads always create bg sockets, * but only some types of darwinbg change the sockets * after they're created */ if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) { wants_all_sockets_bg = wants_darwinbg = true; } if (requested.thrp_pidbind_bg) { wants_all_sockets_bg = wants_darwinbg = true; } if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE) { wants_darwinbg = true; } /* Calculate side effects of DARWIN_BG */ if (wants_darwinbg) { next.thep_darwinbg = 1; next.thep_wi_driven = 0; } if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) { next.thep_new_sockets_bg = 1; } /* Don't use task_effective.tep_all_sockets_bg here */ if (wants_all_sockets_bg) { next.thep_all_sockets_bg = 1; } /* darwinbg implies background QOS (or lower) */ if (next.thep_darwinbg && (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) { next.thep_qos = THREAD_QOS_BACKGROUND; next.thep_qos_relprio = 0; } /* Calculate IO policy */ int iopol = THROTTLE_LEVEL_TIER0; /* Factor in the task's IO policy */ if (next.thep_darwinbg) { iopol = MAX(iopol, task_effective.tep_bg_iotier); } if (!next.thep_promote_above_task) { iopol = MAX(iopol, task_effective.tep_io_tier); } /* Look up the associated IO tier value for the QoS class */ iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]); iopol = MAX(iopol, requested.thrp_int_iotier); iopol = MAX(iopol, requested.thrp_ext_iotier); /* Apply the kevent iotier override */ iopol = MIN(iopol, requested.thrp_iotier_kevent_override); next.thep_io_tier = iopol; /* * If a QoS override is causing IO to go into a lower tier, we also set * the passive bit so that a thread doesn't end up stuck in its own throttle * window when the override goes away. */ int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos]; int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos]; bool qos_io_override_active = (next_qos_iotier < req_qos_iotier); /* Calculate Passive IO policy */ if (requested.thrp_ext_iopassive || requested.thrp_int_iopassive || qos_io_override_active || task_effective.tep_io_passive) { next.thep_io_passive = 1; } /* Calculate timer QOS */ uint32_t latency_qos = requested.thrp_latency_qos; if (!next.thep_promote_above_task) { latency_qos = MAX(latency_qos, task_effective.tep_latency_qos); } latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]); next.thep_latency_qos = latency_qos; /* Calculate throughput QOS */ uint32_t through_qos = requested.thrp_through_qos; if (!next.thep_promote_above_task) { through_qos = MAX(through_qos, task_effective.tep_through_qos); } through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]); next.thep_through_qos = through_qos; if (task_effective.tep_terminated || requested.thrp_terminated) { /* Shoot down the throttles that slow down exit or response to SIGTERM */ next.thep_terminated = 1; next.thep_darwinbg = 0; next.thep_io_tier = THROTTLE_LEVEL_TIER0; next.thep_qos = THREAD_QOS_UNSPECIFIED; next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED; next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED; next.thep_wi_driven = 0; } /* * Step 3: * Swap out old policy for new policy */ struct thread_effective_policy prev = thread->effective_policy; thread_update_qos_cpu_time_locked(thread); /* This is the point where the new values become visible to other threads */ thread->effective_policy = next; /* * Step 4: * Pend updates that can't be done while holding the thread lock */ if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) { pend_token->tpt_update_sockets = 1; } /* TODO: Doesn't this only need to be done if the throttle went up? */ if (prev.thep_io_tier != next.thep_io_tier) { pend_token->tpt_update_throttle = 1; } /* * Check for the attributes that sfi_thread_classify() consults, * and trigger SFI re-evaluation. */ if (prev.thep_qos != next.thep_qos || prev.thep_darwinbg != next.thep_darwinbg) { pend_token->tpt_update_thread_sfi = 1; } integer_t old_base_pri = thread->base_pri; /* * Step 5: * Update other subsystems as necessary if something has changed */ /* Check for the attributes that thread_recompute_priority() consults */ if (prev.thep_qos != next.thep_qos || prev.thep_qos_relprio != next.thep_qos_relprio || prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent || prev.thep_promote_above_task != next.thep_promote_above_task || prev.thep_terminated != next.thep_terminated || prev.thep_wi_driven != next.thep_wi_driven || pend_token->tpt_force_recompute_pri == 1 || recompute_priority) { thread_recompute_priority(thread); } /* * Check if the thread is waiting on a turnstile and needs priority propagation. */ if (pend_token->tpt_update_turnstile && ((old_base_pri == thread->base_pri) || !thread_get_waiting_turnstile(thread))) { /* * Reset update turnstile pend token since either * the thread priority did not change or thread is * not blocked on a turnstile. */ pend_token->tpt_update_turnstile = 0; } } /* * Initiate a thread policy state transition on a thread with its TID * Useful if you cannot guarantee the thread won't get terminated * Precondition: No locks are held * Will take task lock - using the non-tid variant is faster * if you already have a thread ref. */ void proc_set_thread_policy_with_tid(task_t task, uint64_t tid, int category, int flavor, int value) { /* takes task lock, returns ref'ed thread or NULL */ thread_t thread = task_findtid(task, tid); if (thread == THREAD_NULL) { return; } proc_set_thread_policy(thread, category, flavor, value); thread_deallocate(thread); } /* * Initiate a thread policy transition on a thread * This path supports networking transitions (i.e. darwinbg transitions) * Precondition: No locks are held */ void proc_set_thread_policy(thread_t thread, int category, int flavor, int value) { proc_set_thread_policy_ext(thread, category, flavor, value, 0); } void proc_set_thread_policy_ext(thread_t thread, int category, int flavor, int value, int value2) { struct task_pend_token pend_token = {}; thread_mtx_lock(thread); proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token); thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); } /* * Do the things that can't be done while holding a thread mutex. * These are set up to call back into thread policy to get the latest value, * so they don't have to be synchronized with the update. * The only required semantic is 'call this sometime after updating effective policy' * * Precondition: Thread mutex is not held * * This may be called with the task lock held, but in that case it won't be * called with tpt_update_sockets set. */ void thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token) { #ifdef MACH_BSD if (pend_token->tpt_update_sockets) { proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread); } #endif /* MACH_BSD */ if (pend_token->tpt_update_throttle) { rethrottle_thread(get_bsdthread_info(thread)); } if (pend_token->tpt_update_thread_sfi) { sfi_reevaluate(thread); } if (pend_token->tpt_update_turnstile) { turnstile_update_thread_priority_chain(thread); } } /* * Set and update thread policy * Thread mutex might be held */ static void proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token) { spl_t s = splsched(); thread_lock(thread); proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token); thread_unlock(thread); splx(s); } /* * Set and update thread policy * Thread spinlock is held */ static void proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, thread_tid(thread), threquested_0(thread), threquested_1(thread), value, 0); thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token); thread_policy_update_spinlocked(thread, false, pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, thread_tid(thread), threquested_0(thread), threquested_1(thread), tpending(pend_token), 0); } /* * Set the requested state for a specific flavor to a specific value. */ static void thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token) { int tier, passive; struct thread_requested_policy requested = thread->requested_policy; switch (flavor) { /* Category: EXTERNAL and INTERNAL, thread and task */ case TASK_POLICY_DARWIN_BG: if (category == TASK_POLICY_EXTERNAL) { requested.thrp_ext_darwinbg = value; } else { requested.thrp_int_darwinbg = value; } pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_IOPOL: proc_iopol_to_tier(value, &tier, &passive); if (category == TASK_POLICY_EXTERNAL) { requested.thrp_ext_iotier = tier; requested.thrp_ext_iopassive = passive; } else { requested.thrp_int_iotier = tier; requested.thrp_int_iopassive = passive; } break; case TASK_POLICY_IO: if (category == TASK_POLICY_EXTERNAL) { requested.thrp_ext_iotier = value; } else { requested.thrp_int_iotier = value; } break; case TASK_POLICY_PASSIVE_IO: if (category == TASK_POLICY_EXTERNAL) { requested.thrp_ext_iopassive = value; } else { requested.thrp_int_iopassive = value; } break; /* Category: ATTRIBUTE, thread only */ case TASK_POLICY_PIDBIND_BG: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_pidbind_bg = value; pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_LATENCY_QOS: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_latency_qos = value; break; case TASK_POLICY_THROUGH_QOS: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_through_qos = value; break; case TASK_POLICY_QOS_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos_override = value; pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_QOS_AND_RELPRIO: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos = value; requested.thrp_qos_relprio = value2; pend_token->tpt_update_turnstile = 1; DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio); break; case TASK_POLICY_QOS_WORKQ_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos_workq_override = value; pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_QOS_PROMOTE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos_promote = value; break; case TASK_POLICY_QOS_KEVENT_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos_kevent_override = value; pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_QOS_SERVICER_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_qos_wlsvc_override = value; pend_token->tpt_update_turnstile = 1; break; case TASK_POLICY_TERMINATED: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_terminated = value; break; case TASK_POLICY_IOTIER_KEVENT_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); requested.thrp_iotier_kevent_override = value; break; case TASK_POLICY_WI_DRIVEN: assert(category == TASK_POLICY_ATTRIBUTE); assert(thread == current_thread()); const bool set_policy = value; const sched_mode_t mode = value2; requested.thrp_wi_driven = set_policy ? 1 : 0; /* * No sched mode change for REALTIME (threads must explicitly * opt-in), however the priority_offset needs to be updated. */ if (mode == TH_MODE_REALTIME) { const int pri = work_interval_get_priority(thread); assert3u(pri, >=, BASEPRI_RTQUEUES); thread->realtime.priority_offset = set_policy ? (uint8_t)(pri - BASEPRI_RTQUEUES) : 0; } else { sched_set_thread_mode_user(thread, mode); if (set_policy) { thread->static_param = true; } } break; default: panic("unknown task policy: %d %d %d", category, flavor, value); break; } thread->requested_policy = requested; } /* * Gets what you set. Effective values may be different. * Precondition: No locks are held */ int proc_get_thread_policy(thread_t thread, int category, int flavor) { int value = 0; thread_mtx_lock(thread); value = proc_get_thread_policy_locked(thread, category, flavor, NULL); thread_mtx_unlock(thread); return value; } static int proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2) { int value = 0; spl_t s = splsched(); thread_lock(thread); value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2); thread_unlock(thread); splx(s); return value; } /* * Gets what you set. Effective values may be different. */ static int thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2) { int value = 0; struct thread_requested_policy requested = thread->requested_policy; switch (flavor) { case TASK_POLICY_DARWIN_BG: if (category == TASK_POLICY_EXTERNAL) { value = requested.thrp_ext_darwinbg; } else { value = requested.thrp_int_darwinbg; } break; case TASK_POLICY_IOPOL: if (category == TASK_POLICY_EXTERNAL) { value = proc_tier_to_iopol(requested.thrp_ext_iotier, requested.thrp_ext_iopassive); } else { value = proc_tier_to_iopol(requested.thrp_int_iotier, requested.thrp_int_iopassive); } break; case TASK_POLICY_IO: if (category == TASK_POLICY_EXTERNAL) { value = requested.thrp_ext_iotier; } else { value = requested.thrp_int_iotier; } break; case TASK_POLICY_PASSIVE_IO: if (category == TASK_POLICY_EXTERNAL) { value = requested.thrp_ext_iopassive; } else { value = requested.thrp_int_iopassive; } break; case TASK_POLICY_QOS: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos; break; case TASK_POLICY_QOS_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos_override; break; case TASK_POLICY_LATENCY_QOS: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_latency_qos; break; case TASK_POLICY_THROUGH_QOS: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_through_qos; break; case TASK_POLICY_QOS_WORKQ_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos_workq_override; break; case TASK_POLICY_QOS_AND_RELPRIO: assert(category == TASK_POLICY_ATTRIBUTE); assert(value2 != NULL); value = requested.thrp_qos; *value2 = requested.thrp_qos_relprio; break; case TASK_POLICY_QOS_PROMOTE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos_promote; break; case TASK_POLICY_QOS_KEVENT_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos_kevent_override; break; case TASK_POLICY_QOS_SERVICER_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_qos_wlsvc_override; break; case TASK_POLICY_TERMINATED: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_terminated; break; case TASK_POLICY_IOTIER_KEVENT_OVERRIDE: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_iotier_kevent_override; break; case TASK_POLICY_WI_DRIVEN: assert(category == TASK_POLICY_ATTRIBUTE); value = requested.thrp_wi_driven; break; default: panic("unknown policy_flavor %d", flavor); break; } return value; } /* * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. * * NOTE: This accessor does not take the task or thread lock. * Notifications of state updates need to be externally synchronized with state queries. * This routine *MUST* remain interrupt safe, as it is potentially invoked * within the context of a timer interrupt. * * TODO: I think we can get away with architecting this such that we don't need to look at the task ever. * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates. * I don't think that cost is worth not having the right answer. */ int proc_get_effective_thread_policy(thread_t thread, int flavor) { int value = 0; switch (flavor) { case TASK_POLICY_DARWIN_BG: /* * This call is used within the timer layer, as well as * prioritizing requests to the graphics system. * It also informs SFI and originator-bg-state. * Returns 1 for background mode, 0 for normal mode */ value = thread->effective_policy.thep_darwinbg ? 1 : 0; break; case TASK_POLICY_IO: /* * The I/O system calls here to find out what throttling tier to apply to an operation. * Returns THROTTLE_LEVEL_* values */ value = thread->effective_policy.thep_io_tier; if (thread->iotier_override != THROTTLE_LEVEL_NONE) { value = MIN(value, thread->iotier_override); } break; case TASK_POLICY_PASSIVE_IO: /* * The I/O system calls here to find out whether an operation should be passive. * (i.e. not cause operations with lower throttle tiers to be throttled) * Returns 1 for passive mode, 0 for normal mode * * If an override is causing IO to go into a lower tier, we also set * the passive bit so that a thread doesn't end up stuck in its own throttle * window when the override goes away. */ value = thread->effective_policy.thep_io_passive ? 1 : 0; if (thread->iotier_override != THROTTLE_LEVEL_NONE && thread->iotier_override < thread->effective_policy.thep_io_tier) { value = 1; } break; case TASK_POLICY_ALL_SOCKETS_BG: /* * do_background_socket() calls this to determine whether * it should change the thread's sockets * Returns 1 for background mode, 0 for normal mode * This consults both thread and task so un-DBGing a thread while the task is BG * doesn't get you out of the network throttle. */ value = (thread->effective_policy.thep_all_sockets_bg || get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0; break; case TASK_POLICY_NEW_SOCKETS_BG: /* * socreate() calls this to determine if it should mark a new socket as background * Returns 1 for background mode, 0 for normal mode */ value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0; break; case TASK_POLICY_LATENCY_QOS: /* * timer arming calls into here to find out the timer coalescing level * Returns a latency QoS tier (0-6) */ value = thread->effective_policy.thep_latency_qos; break; case TASK_POLICY_THROUGH_QOS: /* * This value is passed into the urgency callout from the scheduler * to the performance management subsystem. * * Returns a throughput QoS tier (0-6) */ value = thread->effective_policy.thep_through_qos; break; case TASK_POLICY_QOS: /* * This is communicated to the performance management layer and SFI. * * Returns a QoS policy tier */ value = thread->effective_policy.thep_qos; break; default: panic("unknown thread policy flavor %d", flavor); break; } return value; } /* * (integer_t) casts limit the number of bits we can fit here * this interface is deprecated and replaced by the _EXT struct ? */ static void proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info) { uint64_t bits = 0; struct thread_requested_policy requested = thread->requested_policy; bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0); bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0); bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); info->requested = (integer_t) bits; bits = 0; struct thread_effective_policy effective = thread->effective_policy; bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0); bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0); bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0); bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); info->effective = (integer_t)bits; bits = 0; info->pending = 0; } /* * Sneakily trace either the task and thread requested * or just the thread requested, depending on if we have enough room. * We do have room on LP64. On LP32, we have to split it between two uintptr_t's. * * LP32 LP64 * threquested_0(thread) thread[0] task[0] * threquested_1(thread) thread[1] thread[0] * */ uintptr_t threquested_0(thread_t thread) { static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated"); uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy; return raw[0]; } uintptr_t threquested_1(thread_t thread) { #if defined __LP64__ return *(uintptr_t*)&get_threadtask(thread)->requested_policy; #else uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy; return raw[1]; #endif } uintptr_t theffective_0(thread_t thread) { static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated"); uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy; return raw[0]; } uintptr_t theffective_1(thread_t thread) { #if defined __LP64__ return *(uintptr_t*)&get_threadtask(thread)->effective_policy; #else uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy; return raw[1]; #endif } /* * Set an override on the thread which is consulted with a * higher priority than the task/thread policy. This should * only be set for temporary grants until the thread * returns to the userspace boundary * * We use atomic operations to swap in the override, with * the assumption that the thread itself can * read the override and clear it on return to userspace. * * No locking is performed, since it is acceptable to see * a stale override for one loop through throttle_lowpri_io(). * However a thread reference must be held on the thread. */ void set_thread_iotier_override(thread_t thread, int policy) { int current_override; /* Let most aggressive I/O policy win until user boundary */ do { current_override = thread->iotier_override; if (current_override != THROTTLE_LEVEL_NONE) { policy = MIN(current_override, policy); } if (current_override == policy) { /* no effective change */ return; } } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override)); /* * Since the thread may be currently throttled, * re-evaluate tiers and potentially break out * of an msleep */ rethrottle_thread(get_bsdthread_info(thread)); } /* * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks, * semaphores, dispatch_sync) may result in priority inversions where a higher priority * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower * priority thread. In these cases, we attempt to propagate the priority token, as long * as the subsystem informs us of the relationships between the threads. The userspace * synchronization subsystem should maintain the information of owner->resource and * resource->waiters itself. */ /* * This helper canonicalizes the resource/resource_type given the current qos_override_mode * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need * to be handled specially in the future, but for now it's fine to slam * *resource to USER_ADDR_NULL even if it was previously a wildcard. */ static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) { if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { /* Map all input resource/type to a single one */ *resource = USER_ADDR_NULL; *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN; } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) { /* no transform */ } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) { /* Map all mutex overrides to a single one, to avoid memory overhead */ if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) { *resource = USER_ADDR_NULL; } } } /* This helper routine finds an existing override if known. Locking should be done by caller */ static struct thread_qos_override * find_qos_override(thread_t thread, user_addr_t resource, int resource_type) { struct thread_qos_override *override; override = thread->overrides; while (override) { if (override->override_resource == resource && override->override_resource_type == resource_type) { return override; } override = override->override_next; } return NULL; } static void find_and_decrement_qos_override(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, struct thread_qos_override **free_override_list) { struct thread_qos_override *override, *override_prev; override_prev = NULL; override = thread->overrides; while (override) { struct thread_qos_override *override_next = override->override_next; if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) && (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) { if (reset) { override->override_contended_resource_count = 0; } else { override->override_contended_resource_count--; } if (override->override_contended_resource_count == 0) { if (override_prev == NULL) { thread->overrides = override_next; } else { override_prev->override_next = override_next; } /* Add to out-param for later zfree */ override->override_next = *free_override_list; *free_override_list = override; } else { override_prev = override; } if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) { return; } } else { override_prev = override; } override = override_next; } } /* This helper recalculates the current requested override using the policy selected at boot */ static int calculate_requested_qos_override(thread_t thread) { if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { return THREAD_QOS_UNSPECIFIED; } /* iterate over all overrides and calculate MAX */ struct thread_qos_override *override; int qos_override = THREAD_QOS_UNSPECIFIED; override = thread->overrides; while (override) { qos_override = MAX(qos_override, override->override_qos); override = override->override_next; } return qos_override; } /* * Returns: * - 0 on success * - EINVAL if some invalid input was passed */ static int proc_thread_qos_add_override_internal(thread_t thread, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type) { struct task_pend_token pend_token = {}; int rc = 0; thread_mtx_lock(thread); KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START, thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0); DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread), uint64_t, thread->requested_policy.thrp_qos, uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource); struct thread_qos_override *override; struct thread_qos_override *override_new = NULL; int new_qos_override, prev_qos_override; int new_effective_qos; canonicalize_resource_and_type(&resource, &resource_type); override = find_qos_override(thread, resource, resource_type); if (first_override_for_resource && !override) { /* We need to allocate a new object. Drop the thread lock and * recheck afterwards in case someone else added the override */ thread_mtx_unlock(thread); override_new = zalloc(thread_qos_override_zone); thread_mtx_lock(thread); override = find_qos_override(thread, resource, resource_type); } if (first_override_for_resource && override) { /* Someone else already allocated while the thread lock was dropped */ override->override_contended_resource_count++; } else if (!override && override_new) { override = override_new; override_new = NULL; override->override_next = thread->overrides; /* since first_override_for_resource was TRUE */ override->override_contended_resource_count = 1; override->override_resource = resource; override->override_resource_type = (int16_t)resource_type; override->override_qos = THREAD_QOS_UNSPECIFIED; thread->overrides = override; } if (override) { if (override->override_qos == THREAD_QOS_UNSPECIFIED) { override->override_qos = (int16_t)override_qos; } else { override->override_qos = MAX(override->override_qos, (int16_t)override_qos); } } /* Determine how to combine the various overrides into a single current * requested override */ new_qos_override = calculate_requested_qos_override(thread); prev_qos_override = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL); if (new_qos_override != prev_qos_override) { proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token); } new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS); thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); if (override_new) { zfree(thread_qos_override_zone, override_new); } DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override, int, new_qos_override, int, new_effective_qos, int, rc); KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END, new_qos_override, resource, resource_type, 0, 0); return rc; } int proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type) { boolean_t has_thread_reference = FALSE; int rc = 0; if (thread == THREAD_NULL) { thread = task_findtid(task, tid); /* returns referenced thread */ if (thread == THREAD_NULL) { KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE, tid, 0, 0xdead, 0, 0); return ESRCH; } has_thread_reference = TRUE; } else { assert(get_threadtask(thread) == task); } rc = proc_thread_qos_add_override_internal(thread, override_qos, first_override_for_resource, resource, resource_type); if (has_thread_reference) { thread_deallocate(thread); } return rc; } static void proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset) { struct task_pend_token pend_token = {}; struct thread_qos_override *deferred_free_override_list = NULL; int new_qos_override, prev_qos_override, new_effective_qos; thread_mtx_lock(thread); canonicalize_resource_and_type(&resource, &resource_type); find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list); KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START, thread_tid(thread), resource, reset, 0, 0); DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread), uint64_t, thread->requested_policy.thrp_qos, uint64_t, thread->effective_policy.thep_qos); /* Determine how to combine the various overrides into a single current requested override */ new_qos_override = calculate_requested_qos_override(thread); spl_t s = splsched(); thread_lock(thread); /* * The override chain and therefore the value of the current override is locked with thread mutex, * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock. * This means you can't change the current override from a spinlock-only setter. */ prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL); if (new_qos_override != prev_qos_override) { proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token); } new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS); thread_unlock(thread); splx(s); thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); while (deferred_free_override_list) { struct thread_qos_override *override_next = deferred_free_override_list->override_next; zfree(thread_qos_override_zone, deferred_free_override_list); deferred_free_override_list = override_next; } DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override, int, new_qos_override, int, new_effective_qos); KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0); } int proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type) { boolean_t has_thread_reference = FALSE; if (thread == THREAD_NULL) { thread = task_findtid(task, tid); /* returns referenced thread */ if (thread == THREAD_NULL) { KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE, tid, 0, 0xdead, 0, 0); return ESRCH; } has_thread_reference = TRUE; } else { assert(task == get_threadtask(thread)); } proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE); if (has_thread_reference) { thread_deallocate(thread); } return 0; } /* Deallocate before thread termination */ void proc_thread_qos_deallocate(thread_t thread) { /* This thread must have no more IPC overrides. */ assert(thread->kevent_overrides == 0); assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED); assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED); /* * Clear out any lingering override objects. */ struct thread_qos_override *override; thread_mtx_lock(thread); override = thread->overrides; thread->overrides = NULL; thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED; /* We don't need to re-evaluate thread policy here because the thread has already exited */ thread_mtx_unlock(thread); while (override) { struct thread_qos_override *override_next = override->override_next; zfree(thread_qos_override_zone, override); override = override_next; } } /* * Set up the primordial thread's QoS */ void task_set_main_thread_qos(task_t task, thread_t thread) { struct task_pend_token pend_token = {}; assert(get_threadtask(thread) == task); thread_mtx_lock(thread); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START, thread_tid(thread), threquested_0(thread), threquested_1(thread), thread->requested_policy.thrp_qos, 0); thread_qos_t primordial_qos = task_compute_main_thread_qos(task); proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, primordial_qos, 0, &pend_token); thread_mtx_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END, thread_tid(thread), threquested_0(thread), threquested_1(thread), primordial_qos, 0); } /* * KPI for pthread kext * * Return a good guess at what the initial manager QoS will be * Dispatch can override this in userspace if it so chooses */ thread_qos_t task_get_default_manager_qos(task_t task) { thread_qos_t primordial_qos = task_compute_main_thread_qos(task); if (primordial_qos == THREAD_QOS_LEGACY) { primordial_qos = THREAD_QOS_USER_INITIATED; } return primordial_qos; } /* * Check if the kernel promotion on thread has changed * and apply it. * * thread locked on entry and exit */ boolean_t thread_recompute_kernel_promotion_locked(thread_t thread) { boolean_t needs_update = FALSE; uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread); /* * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE. * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE * and propagates the priority through the chain with the same cap, because as of now it does * not differenciate on the kernel primitive. * * If this assumption will change with the adoption of a kernel primitive that does not * cap the when adding/propagating, * then here is the place to put the generic cap for all kernel primitives * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE)) */ assert(kern_promotion_schedpri <= MAXPRI_PROMOTE); if (kern_promotion_schedpri != thread->kern_promotion_schedpri) { KDBG(MACHDBG_CODE( DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE, thread_tid(thread), kern_promotion_schedpri, thread->kern_promotion_schedpri); needs_update = TRUE; thread->kern_promotion_schedpri = kern_promotion_schedpri; thread_recompute_sched_pri(thread, SETPRI_DEFAULT); } return needs_update; } /* * Check if the user promotion on thread has changed * and apply it. * * thread locked on entry, might drop the thread lock * and reacquire it. */ boolean_t thread_recompute_user_promotion_locked(thread_t thread) { boolean_t needs_update = FALSE; struct task_pend_token pend_token = {}; uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER); int old_base_pri = thread->base_pri; thread_qos_t qos_promotion; /* Check if user promotion has changed */ if (thread->user_promotion_basepri == user_promotion_basepri) { return needs_update; } else { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE, thread_tid(thread), user_promotion_basepri, thread->user_promotion_basepri, 0, 0); KDBG(MACHDBG_CODE( DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE, thread_tid(thread), user_promotion_basepri, thread->user_promotion_basepri); } /* Update the user promotion base pri */ thread->user_promotion_basepri = user_promotion_basepri; pend_token.tpt_force_recompute_pri = 1; if (user_promotion_basepri <= MAXPRI_THROTTLE) { qos_promotion = THREAD_QOS_UNSPECIFIED; } else { qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri); } proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token); if (thread_get_waiting_turnstile(thread) && thread->base_pri != old_base_pri) { needs_update = TRUE; } thread_unlock(thread); thread_policy_update_complete_unlocked(thread, &pend_token); thread_lock(thread); return needs_update; } /* * Convert the thread user promotion base pri to qos for threads in qos world. * For priority above UI qos, the qos would be set to UI. */ thread_qos_t thread_user_promotion_qos_for_pri(int priority) { thread_qos_t qos; for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) { if (thread_qos_policy_params.qos_pri[qos] <= priority) { return qos; } } return THREAD_QOS_MAINTENANCE; } /* * Set the thread's QoS Kevent override * Owned by the Kevent subsystem * * May be called with spinlocks held, but not spinlocks * that may deadlock against the thread lock, the throttle lock, or the SFI lock. * * One 'add' must be balanced by one 'drop'. * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'. * Before the thread is deallocated, there must be 0 remaining overrides. */ static void thread_kevent_override(thread_t thread, uint32_t qos_override, boolean_t is_new_override) { struct task_pend_token pend_token = {}; boolean_t needs_update; spl_t s = splsched(); thread_lock(thread); uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override; assert(qos_override > THREAD_QOS_UNSPECIFIED); assert(qos_override < THREAD_QOS_LAST); if (is_new_override) { if (thread->kevent_overrides++ == 0) { /* This add is the first override for this thread */ assert(old_override == THREAD_QOS_UNSPECIFIED); } else { /* There are already other overrides in effect for this thread */ assert(old_override > THREAD_QOS_UNSPECIFIED); } } else { /* There must be at least one override (the previous add call) in effect */ assert(thread->kevent_overrides > 0); assert(old_override > THREAD_QOS_UNSPECIFIED); } /* * We can't allow lowering if there are several IPC overrides because * the caller can't possibly know the whole truth */ if (thread->kevent_overrides == 1) { needs_update = qos_override != old_override; } else { needs_update = qos_override > old_override; } if (needs_update) { proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_KEVENT_OVERRIDE, qos_override, 0, &pend_token); assert(pend_token.tpt_update_sockets == 0); } thread_unlock(thread); splx(s); thread_policy_update_complete_unlocked(thread, &pend_token); } void thread_add_kevent_override(thread_t thread, uint32_t qos_override) { thread_kevent_override(thread, qos_override, TRUE); } void thread_update_kevent_override(thread_t thread, uint32_t qos_override) { thread_kevent_override(thread, qos_override, FALSE); } void thread_drop_kevent_override(thread_t thread) { struct task_pend_token pend_token = {}; spl_t s = splsched(); thread_lock(thread); assert(thread->kevent_overrides > 0); if (--thread->kevent_overrides == 0) { /* * There are no more overrides for this thread, so we should * clear out the saturated override value */ proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0, &pend_token); } thread_unlock(thread); splx(s); thread_policy_update_complete_unlocked(thread, &pend_token); } /* * Set the thread's QoS Workloop Servicer override * Owned by the Kevent subsystem * * May be called with spinlocks held, but not spinlocks * that may deadlock against the thread lock, the throttle lock, or the SFI lock. * * One 'add' must be balanced by one 'drop'. * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'. * Before the thread is deallocated, there must be 0 remaining overrides. */ static void thread_servicer_override(thread_t thread, uint32_t qos_override, boolean_t is_new_override) { struct task_pend_token pend_token = {}; spl_t s = splsched(); thread_lock(thread); if (is_new_override) { assert(!thread->requested_policy.thrp_qos_wlsvc_override); } else { assert(thread->requested_policy.thrp_qos_wlsvc_override); } proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_SERVICER_OVERRIDE, qos_override, 0, &pend_token); thread_unlock(thread); splx(s); assert(pend_token.tpt_update_sockets == 0); thread_policy_update_complete_unlocked(thread, &pend_token); } void thread_add_servicer_override(thread_t thread, uint32_t qos_override) { assert(qos_override > THREAD_QOS_UNSPECIFIED); assert(qos_override < THREAD_QOS_LAST); thread_servicer_override(thread, qos_override, TRUE); } void thread_update_servicer_override(thread_t thread, uint32_t qos_override) { assert(qos_override > THREAD_QOS_UNSPECIFIED); assert(qos_override < THREAD_QOS_LAST); thread_servicer_override(thread, qos_override, FALSE); } void thread_drop_servicer_override(thread_t thread) { thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE); } void thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override) { struct task_pend_token pend_token = {}; uint8_t current_iotier; /* Check if the update is needed */ current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL); if (iotier_override == current_iotier) { return; } spl_t s = splsched(); thread_lock(thread); proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, iotier_override, 0, &pend_token); thread_unlock(thread); splx(s); assert(pend_token.tpt_update_sockets == 0); thread_policy_update_complete_unlocked(thread, &pend_token); } /* Get current requested qos / relpri, may be called from spinlock context */ thread_qos_t thread_get_requested_qos(thread_t thread, int *relpri) { int relprio_value = 0; thread_qos_t qos; qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, &relprio_value); if (relpri) { *relpri = -relprio_value; } return qos; } /* * This function will promote the thread priority * since exec could block other threads calling * proc_find on the proc. This boost must be removed * via call to thread_clear_exec_promotion. * * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397) */ void thread_set_exec_promotion(thread_t thread) { spl_t s = splsched(); thread_lock(thread); sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0); thread_unlock(thread); splx(s); } /* * This function will clear the exec thread * promotion set on the thread by thread_set_exec_promotion. */ void thread_clear_exec_promotion(thread_t thread) { spl_t s = splsched(); thread_lock(thread); sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0); thread_unlock(thread); splx(s); } #if CONFIG_SCHED_RT_ALLOW /* * flag set by -rt-allow-policy-enable boot-arg to restrict use of * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED. */ static TUNABLE( bool, rt_allow_policy_enabled, "-rt-allow_policy-enable", #if XNU_TARGET_OS_XR true #else false #endif /* XNU_TARGET_OS_XR */ ); /* * When the RT allow policy is enabled and a thread allowed to become RT, * sometimes (if the processes RT allow policy is restricted) the thread will * have a CPU limit enforced. The following two tunables determine the * parameters for that CPU limit. */ /* % of the interval allowed to run. */ TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent, "rt_allow_limit_percent", 70); /* The length of interval in nanoseconds. */ TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms, "rt_allow_limit_interval", 10); static bool thread_has_rt(thread_t thread) { return thread->sched_mode == TH_MODE_REALTIME || thread->saved_mode == TH_MODE_REALTIME; } /* * Set a CPU limit on a thread based on the RT allow policy. This will be picked * up by the target thread via the ledger AST. */ static void thread_rt_set_cpulimit(thread_t thread) { /* Force reasonable values for the cpu limit. */ const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1); const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1); thread->t_ledger_req_percentage = percent; thread->t_ledger_req_interval_ms = interval_ms; thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK; thread->sched_flags |= TH_SFLAG_RT_CPULIMIT; } /* Similar to the above but removes any CPU limit. */ static void thread_rt_clear_cpulimit(thread_t thread) { thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT; thread->t_ledger_req_percentage = 0; thread->t_ledger_req_interval_ms = 0; thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE; } /* * Evaluate RT policy for a thread, demoting and undemoting as needed. */ void thread_rt_evaluate(thread_t thread) { task_t task = get_threadtask(thread); bool platform_binary = false; /* If the RT allow policy is not enabled - nothing to do. */ if (!rt_allow_policy_enabled) { return; } /* User threads only. */ if (task == kernel_task) { return; } /* Check for platform binary. */ platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0; spl_t s = splsched(); thread_lock(thread); const thread_work_interval_flags_t wi_flags = os_atomic_load(&thread->th_work_interval_flags, relaxed); /* * RT threads which are not joined to a work interval which allows RT * threads are demoted. Once those conditions no longer hold, the thread * undemoted. */ if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) { if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) { KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL), thread_tid(thread)); sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED); } } else { if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) { sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED); } } /* * RT threads get a CPU limit unless they're part of a platform binary * task. If the thread is no longer RT, any existing CPU limit should be * removed. */ bool set_ast = false; if (!platform_binary && thread_has_rt(thread) && (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) { thread_rt_set_cpulimit(thread); set_ast = true; } if (!platform_binary && !thread_has_rt(thread) && (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) { thread_rt_clear_cpulimit(thread); set_ast = true; } thread_unlock(thread); splx(s); if (set_ast) { /* Ensure the target thread picks up any CPU limit change. */ act_set_astledger(thread); } } #else void thread_rt_evaluate(__unused thread_t thread) { } #endif /* CONFIG_SCHED_RT_ALLOW */