/* * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void thread_depress_abstime(uint64_t interval); static void thread_depress_ms(mach_msg_timeout_t interval); /* Called from commpage to take a delayed preemption when exiting * the "Preemption Free Zone" (PFZ). */ kern_return_t pfz_exit( __unused struct pfz_exit_args *args) { /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */ return KERN_SUCCESS; } /* * swtch and swtch_pri both attempt to context switch (logic in * thread_block no-ops the context switch if nothing would happen). * A boolean is returned that indicates whether there is anything * else runnable. That's no excuse to spin, though. */ static void swtch_continue(void) { processor_t myprocessor; boolean_t result; disable_preemption(); myprocessor = current_processor(); result = SCHED(thread_should_yield)(myprocessor, current_thread()); enable_preemption(); ml_delay_on_yield(); thread_syscall_return(result); /*NOTREACHED*/ } boolean_t swtch( __unused struct swtch_args *args) { processor_t myprocessor; disable_preemption(); myprocessor = current_processor(); if (!SCHED(thread_should_yield)(myprocessor, current_thread())) { mp_enable_preemption(); return FALSE; } enable_preemption(); thread_yield_with_continuation((thread_continue_t)swtch_continue, NULL); } static void swtch_pri_continue(void) { processor_t myprocessor; boolean_t result; thread_depress_abort(current_thread()); disable_preemption(); myprocessor = current_processor(); result = SCHED(thread_should_yield)(myprocessor, current_thread()); mp_enable_preemption(); ml_delay_on_yield(); thread_syscall_return(result); /*NOTREACHED*/ } boolean_t swtch_pri( __unused struct swtch_pri_args *args) { processor_t myprocessor; disable_preemption(); myprocessor = current_processor(); if (!SCHED(thread_should_yield)(myprocessor, current_thread())) { mp_enable_preemption(); return FALSE; } enable_preemption(); thread_depress_abstime(thread_depress_time); thread_yield_with_continuation((thread_continue_t)swtch_pri_continue, NULL); } static void thread_switch_continue(void *parameter, __unused int ret) { thread_t self = current_thread(); int option = (int)(intptr_t)parameter; if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) { thread_depress_abort(self); } ml_delay_on_yield(); thread_syscall_return(KERN_SUCCESS); /*NOTREACHED*/ } /* * thread_switch: * * Context switch. User may supply thread hint. */ kern_return_t thread_switch( struct thread_switch_args *args) { thread_t thread = THREAD_NULL; thread_t self = current_thread(); mach_port_name_t thread_name = args->thread_name; int option = args->option; mach_msg_timeout_t option_time = args->option_time; uint32_t scale_factor = NSEC_PER_MSEC; boolean_t depress_option = FALSE; boolean_t wait_option = FALSE; wait_interrupt_t interruptible = THREAD_ABORTSAFE; port_intrans_options_t ptt_options = PORT_INTRANS_THREAD_NOT_CURRENT_THREAD; /* * Validate and process option. * * OSLock boosting only applies to other threads * in your same task (even if you have a port for * a thread in another task) */ switch (option) { case SWITCH_OPTION_NONE: break; case SWITCH_OPTION_WAIT: wait_option = TRUE; break; case SWITCH_OPTION_DEPRESS: depress_option = TRUE; break; case SWITCH_OPTION_DISPATCH_CONTENTION: scale_factor = NSEC_PER_USEC; wait_option = TRUE; interruptible |= THREAD_WAIT_NOREPORT; break; case SWITCH_OPTION_OSLOCK_DEPRESS: depress_option = TRUE; interruptible |= THREAD_WAIT_NOREPORT; ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK; break; case SWITCH_OPTION_OSLOCK_WAIT: wait_option = TRUE; interruptible |= THREAD_WAIT_NOREPORT; ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK; break; default: return KERN_INVALID_ARGUMENT; } /* * Translate the port name if supplied. */ if (thread_name != MACH_PORT_NULL) { thread = port_name_to_thread(thread_name, ptt_options); } if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) { if (thread != THREAD_NULL) { /* * Attempt to kick the lock owner up to our same IO throttling tier. * If the thread is currently blocked in throttle_lowpri_io(), * it will immediately break out. * * TODO: SFI break out? */ int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO); set_thread_iotier_override(thread, new_policy); } } /* * Try to handoff if supplied. */ if (thread != THREAD_NULL) { spl_t s = splsched(); /* This may return a different thread if the target is pushing on something */ thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE, thread_tid(thread), thread->state, pulled_thread ? TRUE : FALSE, 0, 0); if (pulled_thread != THREAD_NULL) { /* We can't be dropping the last ref here */ thread_deallocate_safe(thread); if (wait_option) { assert_wait_timeout((event_t)assert_wait_timeout, interruptible, option_time, scale_factor); } else if (depress_option) { thread_depress_ms(option_time); } thread_run(self, thread_switch_continue, (void *)(intptr_t)option, pulled_thread); __builtin_unreachable(); } splx(s); thread_deallocate(thread); } if (wait_option) { assert_wait_timeout((event_t)assert_wait_timeout, interruptible, option_time, scale_factor); } else { disable_preemption(); bool should_yield = SCHED(thread_should_yield)(current_processor(), current_thread()); enable_preemption(); if (should_yield == false) { /* Early-return if yielding to the scheduler will not be beneficial */ return KERN_SUCCESS; } if (depress_option) { thread_depress_ms(option_time); } } thread_yield_with_continuation(thread_switch_continue, (void *)(intptr_t)option); __builtin_unreachable(); } void thread_yield_with_continuation( thread_continue_t continuation, void *parameter) { assert(continuation); thread_block_reason(continuation, parameter, AST_YIELD); __builtin_unreachable(); } /* This function is called after an assert_wait(), therefore it must not * cause another wait until after the thread_run() or thread_block() * * Following are the calling convention for thread ref deallocation. * * 1) If no continuation is provided, then thread ref is consumed. * (thread_handoff_deallocate convention). * * 2) If continuation is provided with option THREAD_HANDOFF_SETRUN_NEEDED * then thread ref is always consumed. * * 3) If continuation is provided with option THREAD_HANDOFF_NONE then thread * ref is not consumed and it is upto the continuation to deallocate * the thread reference. */ static wait_result_t thread_handoff_internal(thread_t thread, thread_continue_t continuation, void *parameter, thread_handoff_option_t option) { thread_t self = current_thread(); /* * Try to handoff if supplied. */ if (thread != THREAD_NULL) { spl_t s = splsched(); thread_t pulled_thread = thread_prepare_for_handoff(thread, option); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE, thread_tid(thread), thread->state, pulled_thread ? TRUE : FALSE, 0, 0); /* Deallocate thread ref if needed */ if (continuation == NULL || (option & THREAD_HANDOFF_SETRUN_NEEDED)) { /* Use the safe version of thread deallocate */ thread_deallocate_safe(thread); } if (pulled_thread != THREAD_NULL) { int result = thread_run(self, continuation, parameter, pulled_thread); splx(s); return result; } splx(s); } int result = thread_block_parameter(continuation, parameter); return result; } void thread_handoff_parameter(thread_t thread, thread_continue_t continuation, void *parameter, thread_handoff_option_t option) { thread_handoff_internal(thread, continuation, parameter, option); panic("NULL continuation passed to %s", __func__); __builtin_unreachable(); } wait_result_t thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option) { return thread_handoff_internal(thread, NULL, NULL, option); } /* * Thread depression * * This mechanism drops a thread to priority 0 in order for it to yield to * all other runnnable threads on the system. It can be canceled or timed out, * whereupon the thread goes back to where it was. * * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the * same time. DEPRESS always defers to POLLDEPRESS. * * DEPRESS only lasts across a single thread_block call, and never returns * to userspace. * POLLDEPRESS can be active anywhere up until thread termination. */ void thread_depress_timer_setup(thread_t self) { self->depress_timer = kalloc_type(struct timer_call, Z_ZERO | Z_WAITOK | Z_NOFAIL); timer_call_setup(self->depress_timer, thread_depress_expire, self); } /* * Depress thread's priority to lowest possible for the specified interval, * with an interval of zero resulting in no timeout being scheduled. * * Must block with AST_YIELD afterwards to take effect */ void thread_depress_abstime(uint64_t interval) { thread_t self = current_thread(); spl_t s = splsched(); thread_lock(self); assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0); if ((self->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) { self->sched_flags |= TH_SFLAG_DEPRESS; thread_recompute_sched_pri(self, SETPRI_LAZY); if (interval != 0) { uint64_t deadline; clock_absolutetime_interval_to_deadline(interval, &deadline); if (!timer_call_enter(self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) { self->depress_timer_active++; } } } thread_unlock(self); splx(s); } void thread_depress_ms(mach_msg_timeout_t interval) { uint64_t abstime; clock_interval_to_absolutetime_interval(interval, NSEC_PER_MSEC, &abstime); thread_depress_abstime(abstime); } /* * Priority depression expiration. */ void thread_depress_expire(void *p0, __unused void *p1) { thread_t thread = (thread_t)p0; spl_t s = splsched(); thread_lock(thread); assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK); if (--thread->depress_timer_active == 0) { thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; if ((thread->state & TH_RUN) == TH_RUN) { thread->last_basepri_change_time = mach_absolute_time(); } thread_recompute_sched_pri(thread, SETPRI_DEFAULT); } thread_unlock(thread); splx(s); } /* * Prematurely abort priority depression if there is one. */ kern_return_t thread_depress_abort(thread_t thread) { kern_return_t result = KERN_NOT_DEPRESSED; spl_t s = splsched(); thread_lock(thread); assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK); /* * User-triggered depress-aborts should not get out * of the poll-depress, but they should cancel a regular depress. */ if ((thread->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) { result = thread_depress_abort_locked(thread); } thread_unlock(thread); splx(s); return result; } /* * Prematurely abort priority depression or poll depression if one is active. * Called with the thread locked. */ kern_return_t thread_depress_abort_locked(thread_t thread) { if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0) { return KERN_NOT_DEPRESSED; } assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK); thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; if ((thread->state & TH_RUN) == TH_RUN) { thread->last_basepri_change_time = mach_absolute_time(); } thread_recompute_sched_pri(thread, SETPRI_LAZY); if (timer_call_cancel(thread->depress_timer)) { thread->depress_timer_active--; } return KERN_SUCCESS; } /* * Invoked as part of a polling operation like a no-timeout port receive * * Forces a fixpri thread to yield if it is detected polling without blocking for too long. */ void thread_poll_yield(thread_t self) { assert(self == current_thread()); assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0); if (self->sched_mode != TH_MODE_FIXED) { return; } spl_t s = splsched(); uint64_t abstime = mach_absolute_time(); uint64_t total_computation = abstime - self->computation_epoch + self->computation_metered; if (total_computation >= max_poll_computation) { thread_lock(self); self->computation_epoch = abstime; self->computation_metered = 0; uint64_t yield_expiration = abstime + (total_computation >> sched_poll_yield_shift); if (!timer_call_enter(self->depress_timer, yield_expiration, TIMER_CALL_USER_CRITICAL)) { self->depress_timer_active++; } self->sched_flags |= TH_SFLAG_POLLDEPRESS; thread_recompute_sched_pri(self, SETPRI_DEFAULT); thread_unlock(self); } splx(s); } /* * Kernel-internal interface to yield for a specified period * * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock! */ void thread_yield_internal(mach_msg_timeout_t ms) { thread_t self = current_thread(); assert((self->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK); processor_t myprocessor; disable_preemption(); myprocessor = current_processor(); if (!SCHED(thread_should_yield)(myprocessor, self)) { mp_enable_preemption(); return; } enable_preemption(); thread_depress_ms(ms); thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD); thread_depress_abort(self); } /* * This yields to a possible non-urgent preemption pending on the current processor. * * This is useful when doing a long computation in the kernel without returning to userspace. * * As opposed to other yielding mechanisms, this does not drop the priority of the current thread. */ void thread_yield_to_preemption() { /* * ast_pending() should ideally be called with interrupts disabled, but * the check here is fine because csw_check() will do the right thing. */ ast_t *pending_ast = ast_pending(); ast_t ast = AST_NONE; processor_t p; if (*pending_ast & AST_PREEMPT) { thread_t self = current_thread(); spl_t s = splsched(); p = current_processor(); thread_lock(self); ast = csw_check(self, p, AST_YIELD); ast_on(ast); thread_unlock(self); if (ast != AST_NONE) { (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast); } splx(s); } }