xref: /xnu-8796.101.5/osfmk/kern/thread_policy.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42 
43 #include <mach/machine/sdt.h>
44 
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46     struct thread_qos_override, KT_DEFAULT);
47 
48 #ifdef MACH_BSD
49 extern int      proc_selfpid(void);
50 extern char *   proc_name_address(void *p);
51 extern void     rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53 
54 #define QOS_EXTRACT(q)        ((q) & 0xff)
55 
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60 
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63 
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66 
67 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
68 
69 const struct thread_requested_policy default_thread_requested_policy = {
70 	.thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72 
73 /*
74  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75  * to threads that don't have a QoS class set.
76  */
77 const qos_policy_params_t thread_qos_policy_params = {
78 	/*
79 	 * This table defines the starting base priority of the thread,
80 	 * which will be modified by the thread importance and the task max priority
81 	 * before being applied.
82 	 */
83 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
84 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
85 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
86 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
87 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
88 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
89 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
90 
91 	/*
92 	 * This table defines the highest IO priority that a thread marked with this
93 	 * QoS class can have.
94 	 */
95 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
100 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
102 
103 	/*
104 	 * This table defines the highest QoS level that
105 	 * a thread marked with this QoS class can have.
106 	 */
107 
108 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 
116 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124 
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127 
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130 
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133 
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136 
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139 
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142 
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145 
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148 
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151 
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154 
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160 
161 
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164     task_pend_token_t pend_token)
165 {
166 	__unused int prev_qos = thread->requested_policy.thrp_qos;
167 
168 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169 
170 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173 
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 	struct task_pend_token pend_token = {};
178 
179 	thread_mtx_lock(thread);
180 	if (!thread->active) {
181 		thread_mtx_unlock(thread);
182 		return KERN_TERMINATED;
183 	}
184 
185 	thread_remove_qos_policy_locked(thread, &pend_token);
186 
187 	thread_mtx_unlock(thread);
188 
189 	thread_policy_update_complete_unlocked(thread, &pend_token);
190 
191 	return KERN_SUCCESS;
192 }
193 
194 
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 	if (thread->static_param) {
199 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 		return TRUE;
201 	}
202 	return FALSE;
203 }
204 
205 /*
206  * Relative priorities can range between 0REL and -15REL. These
207  * map to QoS-specific ranges, to create non-overlapping priority
208  * ranges.
209  */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 	int next_lower_qos;
214 
215 	/* Fast path, since no validation or scaling is needed */
216 	if (qos_relprio == 0) {
217 		return 0;
218 	}
219 
220 	switch (qos) {
221 	case THREAD_QOS_USER_INTERACTIVE:
222 		next_lower_qos = THREAD_QOS_USER_INITIATED;
223 		break;
224 	case THREAD_QOS_USER_INITIATED:
225 		next_lower_qos = THREAD_QOS_LEGACY;
226 		break;
227 	case THREAD_QOS_LEGACY:
228 		next_lower_qos = THREAD_QOS_UTILITY;
229 		break;
230 	case THREAD_QOS_UTILITY:
231 		next_lower_qos = THREAD_QOS_BACKGROUND;
232 		break;
233 	case THREAD_QOS_MAINTENANCE:
234 	case THREAD_QOS_BACKGROUND:
235 		next_lower_qos = 0;
236 		break;
237 	default:
238 		panic("Unrecognized QoS %d", qos);
239 		return 0;
240 	}
241 
242 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244 
245 	/*
246 	 * We now have the valid range that the scaled relative priority can map to. Note
247 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 	 * remainder.
251 	 */
252 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253 
254 	return scaled_relprio;
255 }
256 
257 /*
258  * flag set by -qos-policy-allow boot-arg to allow
259  * testing thread qos policy from userspace
260  */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262 
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 	thread_t                                thread,
266 	thread_policy_flavor_t  flavor,
267 	thread_policy_t                 policy_info,
268 	mach_msg_type_number_t  count)
269 {
270 	thread_qos_policy_data_t req_qos;
271 	kern_return_t kr;
272 
273 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274 
275 	if (thread == THREAD_NULL) {
276 		return KERN_INVALID_ARGUMENT;
277 	}
278 
279 	if (!allow_qos_policy_set) {
280 		if (thread_is_static_param(thread)) {
281 			return KERN_POLICY_STATIC;
282 		}
283 
284 		if (flavor == THREAD_QOS_POLICY) {
285 			return KERN_INVALID_ARGUMENT;
286 		}
287 
288 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 				return KERN_INVALID_ARGUMENT;
291 			}
292 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 			if (info->priority != BASEPRI_RTQUEUES) {
294 				return KERN_INVALID_ARGUMENT;
295 			}
296 		}
297 	}
298 
299 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 			&thread->th_work_interval_flags, relaxed);
302 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 			/* Fail requests to become realtime for threads having joined workintervals
305 			 * with workload ID that don't have the rt-allowed flag. */
306 			return KERN_INVALID_POLICY;
307 		}
308 	}
309 
310 	/* Threads without static_param set reset their QoS when other policies are applied. */
311 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 		/* Store the existing tier, if we fail this call it is used to reset back. */
313 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315 
316 		kr = thread_remove_qos_policy(thread);
317 		if (kr != KERN_SUCCESS) {
318 			return kr;
319 		}
320 	}
321 
322 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323 
324 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 		if (kr != KERN_SUCCESS) {
326 			/* Reset back to our original tier as the set failed. */
327 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 		}
329 	}
330 
331 	return kr;
332 }
333 
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338 
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 	thread_t                     thread,
342 	thread_policy_flavor_t       flavor,
343 	thread_policy_t              policy_info,
344 	mach_msg_type_number_t       count)
345 {
346 	kern_return_t result = KERN_SUCCESS;
347 	struct task_pend_token pend_token = {};
348 
349 	thread_mtx_lock(thread);
350 	if (!thread->active) {
351 		thread_mtx_unlock(thread);
352 
353 		return KERN_TERMINATED;
354 	}
355 
356 	switch (flavor) {
357 	case THREAD_EXTENDED_POLICY:
358 	{
359 		boolean_t timeshare = TRUE;
360 
361 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 			thread_extended_policy_t info;
363 
364 			info = (thread_extended_policy_t)policy_info;
365 			timeshare = info->timeshare;
366 		}
367 
368 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369 
370 		spl_t s = splsched();
371 		thread_lock(thread);
372 
373 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374 
375 		thread_unlock(thread);
376 		splx(s);
377 
378 		/*
379 		 * The thread may be demoted with RT_DISALLOWED but has just
380 		 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 		 * undemote the thread so the new sched mode takes effect.
382 		 */
383 		thread_rt_evaluate(thread);
384 
385 		pend_token.tpt_update_thread_sfi = 1;
386 
387 		break;
388 	}
389 
390 	case THREAD_TIME_CONSTRAINT_POLICY:
391 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 	{
393 		thread_time_constraint_with_priority_policy_t info;
394 
395 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398 
399 		if (count < min_count) {
400 			result = KERN_INVALID_ARGUMENT;
401 			break;
402 		}
403 
404 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
405 
406 
407 		if (info->constraint < info->computation ||
408 		    info->computation > max_rt_quantum ||
409 		    info->computation < min_rt_quantum) {
410 			result = KERN_INVALID_ARGUMENT;
411 			break;
412 		}
413 
414 		if (info->computation < (info->constraint / 2)) {
415 			info->computation = (info->constraint / 2);
416 			if (info->computation > max_rt_quantum) {
417 				info->computation = max_rt_quantum;
418 			}
419 		}
420 
421 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 				result = KERN_INVALID_ARGUMENT;
424 				break;
425 			}
426 		}
427 
428 		spl_t s = splsched();
429 		thread_lock(thread);
430 
431 		thread->realtime.period          = info->period;
432 		thread->realtime.computation     = info->computation;
433 		thread->realtime.constraint      = info->constraint;
434 		thread->realtime.preemptible     = info->preemptible;
435 
436 		/*
437 		 * If the thread has a work interval driven policy, the priority
438 		 * offset has been set by the work interval.
439 		 */
440 		if (!thread->requested_policy.thrp_wi_driven) {
441 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 				thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 			} else {
444 				thread->realtime.priority_offset = 0;
445 			}
446 		}
447 
448 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449 
450 		thread_unlock(thread);
451 		splx(s);
452 
453 		thread_rt_evaluate(thread);
454 
455 		pend_token.tpt_update_thread_sfi = 1;
456 
457 		break;
458 	}
459 
460 	case THREAD_PRECEDENCE_POLICY:
461 	{
462 		thread_precedence_policy_t info;
463 
464 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 			result = KERN_INVALID_ARGUMENT;
466 			break;
467 		}
468 		info = (thread_precedence_policy_t)policy_info;
469 
470 		spl_t s = splsched();
471 		thread_lock(thread);
472 
473 		thread->importance = info->importance;
474 
475 		thread_recompute_priority(thread);
476 
477 		thread_unlock(thread);
478 		splx(s);
479 
480 		break;
481 	}
482 
483 	case THREAD_AFFINITY_POLICY:
484 	{
485 		extern boolean_t affinity_sets_enabled;
486 		thread_affinity_policy_t info;
487 
488 		if (!affinity_sets_enabled) {
489 			result = KERN_INVALID_POLICY;
490 			break;
491 		}
492 
493 		if (!thread_affinity_is_supported()) {
494 			result = KERN_NOT_SUPPORTED;
495 			break;
496 		}
497 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 			result = KERN_INVALID_ARGUMENT;
499 			break;
500 		}
501 
502 		info = (thread_affinity_policy_t) policy_info;
503 		/*
504 		 * Unlock the thread mutex here and
505 		 * return directly after calling thread_affinity_set().
506 		 * This is necessary for correct lock ordering because
507 		 * thread_affinity_set() takes the task lock.
508 		 */
509 		thread_mtx_unlock(thread);
510 		return thread_affinity_set(thread, info->affinity_tag);
511 	}
512 
513 #if !defined(XNU_TARGET_OS_OSX)
514 	case THREAD_BACKGROUND_POLICY:
515 	{
516 		thread_background_policy_t info;
517 
518 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 			result = KERN_INVALID_ARGUMENT;
520 			break;
521 		}
522 
523 		if (get_threadtask(thread) != current_task()) {
524 			result = KERN_PROTECTION_FAILURE;
525 			break;
526 		}
527 
528 		info = (thread_background_policy_t) policy_info;
529 
530 		int enable;
531 
532 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 			enable = TASK_POLICY_ENABLE;
534 		} else {
535 			enable = TASK_POLICY_DISABLE;
536 		}
537 
538 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539 
540 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541 
542 		break;
543 	}
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545 
546 	case THREAD_THROUGHPUT_QOS_POLICY:
547 	{
548 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 		thread_throughput_qos_t tqos;
550 
551 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 			result = KERN_INVALID_ARGUMENT;
553 			break;
554 		}
555 
556 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 			break;
558 		}
559 
560 		tqos = qos_extract(info->thread_throughput_qos_tier);
561 
562 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564 
565 		break;
566 	}
567 
568 	case THREAD_LATENCY_QOS_POLICY:
569 	{
570 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 		thread_latency_qos_t lqos;
572 
573 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 			result = KERN_INVALID_ARGUMENT;
575 			break;
576 		}
577 
578 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 			break;
580 		}
581 
582 		lqos = qos_extract(info->thread_latency_qos_tier);
583 
584 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586 
587 		break;
588 	}
589 
590 	case THREAD_QOS_POLICY:
591 	{
592 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593 
594 		if (count < THREAD_QOS_POLICY_COUNT) {
595 			result = KERN_INVALID_ARGUMENT;
596 			break;
597 		}
598 
599 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 			result = KERN_INVALID_ARGUMENT;
601 			break;
602 		}
603 
604 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 			result = KERN_INVALID_ARGUMENT;
606 			break;
607 		}
608 
609 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 			result = KERN_INVALID_ARGUMENT;
611 			break;
612 		}
613 
614 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 		    info->qos_tier, -info->tier_importance, &pend_token);
616 
617 		break;
618 	}
619 
620 	default:
621 		result = KERN_INVALID_ARGUMENT;
622 		break;
623 	}
624 
625 	thread_mtx_unlock(thread);
626 
627 	thread_policy_update_complete_unlocked(thread, &pend_token);
628 
629 	return result;
630 }
631 
632 /*
633  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634  * Both result in FIXED mode scheduling.
635  */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 	switch (policy) {
640 	case POLICY_TIMESHARE:
641 		return TH_MODE_TIMESHARE;
642 	case POLICY_RR:
643 	case POLICY_FIFO:
644 		return TH_MODE_FIXED;
645 	default:
646 		panic("unexpected sched policy: %d", policy);
647 		return TH_MODE_NONE;
648 	}
649 }
650 
651 /*
652  * Called either with the thread mutex locked
653  * or from the pthread kext in a 'safe place'.
654  */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
657     sched_mode_t          mode,
658     integer_t             priority,
659     task_pend_token_t     pend_token)
660 {
661 	kern_return_t kr = KERN_SUCCESS;
662 
663 	spl_t s = splsched();
664 	thread_lock(thread);
665 
666 	/* This path isn't allowed to change a thread out of realtime. */
667 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 	    (thread->saved_mode == TH_MODE_REALTIME)) {
669 		kr = KERN_FAILURE;
670 		goto unlock;
671 	}
672 
673 	if (thread->policy_reset) {
674 		kr = KERN_SUCCESS;
675 		goto unlock;
676 	}
677 
678 	sched_mode_t old_mode = thread->sched_mode;
679 
680 	/*
681 	 * Reverse engineer and apply the correct importance value
682 	 * from the requested absolute priority value.
683 	 *
684 	 * TODO: Store the absolute priority value instead
685 	 */
686 
687 	if (priority >= thread->max_priority) {
688 		priority = thread->max_priority - thread->task_priority;
689 	} else if (priority >= MINPRI_KERNEL) {
690 		priority -=  MINPRI_KERNEL;
691 	} else if (priority >= MINPRI_RESERVED) {
692 		priority -=  MINPRI_RESERVED;
693 	} else {
694 		priority -= BASEPRI_DEFAULT;
695 	}
696 
697 	priority += thread->task_priority;
698 
699 	if (priority > thread->max_priority) {
700 		priority = thread->max_priority;
701 	} else if (priority < MINPRI) {
702 		priority = MINPRI;
703 	}
704 
705 	thread->importance = priority - thread->task_priority;
706 
707 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
708 
709 	if (mode != old_mode) {
710 		pend_token->tpt_update_thread_sfi = 1;
711 	}
712 
713 unlock:
714 	thread_unlock(thread);
715 	splx(s);
716 
717 	return kr;
718 }
719 
720 void
thread_freeze_base_pri(thread_t thread)721 thread_freeze_base_pri(thread_t thread)
722 {
723 	assert(thread == current_thread());
724 
725 	spl_t s = splsched();
726 	thread_lock(thread);
727 
728 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
729 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
730 
731 	thread_unlock(thread);
732 	splx(s);
733 }
734 
735 bool
thread_unfreeze_base_pri(thread_t thread)736 thread_unfreeze_base_pri(thread_t thread)
737 {
738 	assert(thread == current_thread());
739 	integer_t base_pri;
740 	ast_t ast = 0;
741 
742 	spl_t s = splsched();
743 	thread_lock(thread);
744 
745 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
746 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
747 
748 	base_pri = thread->req_base_pri;
749 	if (base_pri != thread->base_pri) {
750 		/*
751 		 * This function returns "true" if the base pri change
752 		 * is the most likely cause for the preemption.
753 		 */
754 		sched_set_thread_base_priority(thread, base_pri);
755 		ast = ast_peek(AST_PREEMPT);
756 	}
757 
758 	thread_unlock(thread);
759 	splx(s);
760 
761 	return ast != 0;
762 }
763 
764 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)765 thread_workq_pri_for_qos(thread_qos_t qos)
766 {
767 	assert(qos < THREAD_QOS_LAST);
768 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
769 }
770 
771 thread_qos_t
thread_workq_qos_for_pri(int priority)772 thread_workq_qos_for_pri(int priority)
773 {
774 	thread_qos_t qos;
775 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
776 		// indicate that workq should map >UI threads to workq's
777 		// internal notation for above-UI work.
778 		return THREAD_QOS_UNSPECIFIED;
779 	}
780 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
781 		// map a given priority up to the next nearest qos band.
782 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
783 			return qos;
784 		}
785 	}
786 	return THREAD_QOS_MAINTENANCE;
787 }
788 
789 /*
790  * private interface for pthread workqueues
791  *
792  * Set scheduling policy & absolute priority for thread
793  * May be called with spinlocks held
794  * Thread mutex lock is not held
795  */
796 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)797 thread_reset_workq_qos(thread_t thread, uint32_t qos)
798 {
799 	struct task_pend_token pend_token = {};
800 
801 	assert(qos < THREAD_QOS_LAST);
802 
803 	spl_t s = splsched();
804 	thread_lock(thread);
805 
806 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
807 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
808 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
809 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
810 	    &pend_token);
811 
812 	assert(pend_token.tpt_update_sockets == 0);
813 
814 	thread_unlock(thread);
815 	splx(s);
816 
817 	thread_policy_update_complete_unlocked(thread, &pend_token);
818 }
819 
820 /*
821  * private interface for pthread workqueues
822  *
823  * Set scheduling policy & absolute priority for thread
824  * May be called with spinlocks held
825  * Thread mutex lock is held
826  */
827 void
thread_set_workq_override(thread_t thread,uint32_t qos)828 thread_set_workq_override(thread_t thread, uint32_t qos)
829 {
830 	struct task_pend_token pend_token = {};
831 
832 	assert(qos < THREAD_QOS_LAST);
833 
834 	spl_t s = splsched();
835 	thread_lock(thread);
836 
837 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
838 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
839 
840 	assert(pend_token.tpt_update_sockets == 0);
841 
842 	thread_unlock(thread);
843 	splx(s);
844 
845 	thread_policy_update_complete_unlocked(thread, &pend_token);
846 }
847 
848 /*
849  * private interface for pthread workqueues
850  *
851  * Set scheduling policy & absolute priority for thread
852  * May be called with spinlocks held
853  * Thread mutex lock is not held
854  */
855 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)856 thread_set_workq_pri(thread_t  thread,
857     thread_qos_t qos,
858     integer_t priority,
859     integer_t policy)
860 {
861 	struct task_pend_token pend_token = {};
862 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
863 
864 	assert(qos < THREAD_QOS_LAST);
865 	assert(thread->static_param);
866 
867 	if (!thread->static_param || !thread->active) {
868 		return;
869 	}
870 
871 	spl_t s = splsched();
872 	thread_lock(thread);
873 
874 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
875 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
876 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
877 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
878 	    0, &pend_token);
879 
880 	thread_unlock(thread);
881 	splx(s);
882 
883 	/* Concern: this doesn't hold the mutex... */
884 
885 	__assert_only kern_return_t kr;
886 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
887 	    &pend_token);
888 	assert(kr == KERN_SUCCESS);
889 
890 	if (pend_token.tpt_update_thread_sfi) {
891 		sfi_reevaluate(thread);
892 	}
893 }
894 
895 /*
896  * thread_set_mode_and_absolute_pri:
897  *
898  * Set scheduling policy & absolute priority for thread, for deprecated
899  * thread_set_policy and thread_policy interfaces.
900  *
901  * Called with nothing locked.
902  */
903 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)904 thread_set_mode_and_absolute_pri(thread_t   thread,
905     integer_t  policy,
906     integer_t  priority)
907 {
908 	kern_return_t kr = KERN_SUCCESS;
909 	struct task_pend_token pend_token = {};
910 
911 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
912 
913 	thread_mtx_lock(thread);
914 
915 	if (!thread->active) {
916 		kr = KERN_TERMINATED;
917 		goto unlock;
918 	}
919 
920 	if (thread_is_static_param(thread)) {
921 		kr = KERN_POLICY_STATIC;
922 		goto unlock;
923 	}
924 
925 	/* Setting legacy policies on threads kills the current QoS */
926 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
927 		thread_remove_qos_policy_locked(thread, &pend_token);
928 	}
929 
930 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
931 
932 unlock:
933 	thread_mtx_unlock(thread);
934 
935 	thread_policy_update_complete_unlocked(thread, &pend_token);
936 
937 	return kr;
938 }
939 
940 /*
941  * Set the thread's requested mode and recompute priority
942  * Called with thread mutex and thread locked
943  *
944  * TODO: Mitigate potential problems caused by moving thread to end of runq
945  * whenever its priority is recomputed
946  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
947  */
948 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)949 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
950 {
951 	if (thread->policy_reset) {
952 		return;
953 	}
954 
955 	boolean_t removed = thread_run_queue_remove(thread);
956 
957 	sched_set_thread_mode_user(thread, mode);
958 
959 	thread_recompute_priority(thread);
960 
961 	if (removed) {
962 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
963 	}
964 }
965 
966 /* called at splsched with thread lock locked */
967 static void
thread_update_qos_cpu_time_locked(thread_t thread)968 thread_update_qos_cpu_time_locked(thread_t thread)
969 {
970 	task_t task = get_threadtask(thread);
971 	uint64_t timer_sum, timer_delta;
972 
973 	/*
974 	 * This is only as accurate the thread's last context switch or user/kernel
975 	 * transition (unless precise user/kernel time is disabled).
976 	 *
977 	 * TODO: Consider running an update operation here to update it first.
978 	 *       Maybe doable with interrupts disabled from current thread.
979 	 *       If the thread is on a different core, may not be easy to get right.
980 	 */
981 
982 	timer_sum = recount_thread_time_mach(thread);
983 	timer_delta = timer_sum - thread->vtimer_qos_save;
984 
985 	thread->vtimer_qos_save = timer_sum;
986 
987 	uint64_t* task_counter = NULL;
988 
989 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
990 	switch (thread->effective_policy.thep_qos) {
991 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
992 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
993 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
994 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
995 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
996 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
997 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
998 	default:
999 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1000 	}
1001 
1002 	OSAddAtomic64(timer_delta, task_counter);
1003 
1004 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1005 	switch (thread->requested_policy.thrp_qos) {
1006 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1007 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1008 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1009 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1010 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1011 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1012 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1013 	default:
1014 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1015 	}
1016 
1017 	OSAddAtomic64(timer_delta, task_counter);
1018 }
1019 
1020 /*
1021  * called with no thread locks held
1022  * may hold task lock
1023  */
1024 void
thread_update_qos_cpu_time(thread_t thread)1025 thread_update_qos_cpu_time(thread_t thread)
1026 {
1027 	thread_mtx_lock(thread);
1028 
1029 	spl_t s = splsched();
1030 	thread_lock(thread);
1031 
1032 	thread_update_qos_cpu_time_locked(thread);
1033 
1034 	thread_unlock(thread);
1035 	splx(s);
1036 
1037 	thread_mtx_unlock(thread);
1038 }
1039 
1040 /*
1041  * Calculate base priority from thread attributes, and set it on the thread
1042  *
1043  * Called with thread_lock and thread mutex held.
1044  */
1045 extern boolean_t vps_dynamic_priority_enabled;
1046 
1047 void
thread_recompute_priority(thread_t thread)1048 thread_recompute_priority(
1049 	thread_t                thread)
1050 {
1051 	integer_t               priority;
1052 	integer_t               adj_priority;
1053 
1054 	if (thread->policy_reset) {
1055 		return;
1056 	}
1057 
1058 	if (thread->sched_mode == TH_MODE_REALTIME) {
1059 		uint8_t i = thread->realtime.priority_offset;
1060 		assert((i >= 0) && (i < NRTQS));
1061 		priority = BASEPRI_RTQUEUES + i;
1062 
1063 		sched_set_thread_base_priority(thread, priority);
1064 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1065 			/* Make sure the thread has a valid deadline */
1066 			uint64_t ctime = mach_absolute_time();
1067 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1068 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1069 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1070 		}
1071 		return;
1072 
1073 		/*
1074 		 * A thread may have joined a RT work interval but then never
1075 		 * changed its sched mode or have been demoted. RT work
1076 		 * intervals will have RT priorities - ignore the priority if
1077 		 * the thread isn't RT.
1078 		 */
1079 	} else if (thread->effective_policy.thep_wi_driven &&
1080 	    work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1081 		priority = work_interval_get_priority(thread);
1082 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1083 		int qos = thread->effective_policy.thep_qos;
1084 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1085 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1086 		int qos_scaled_relprio;
1087 
1088 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1089 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1090 
1091 		priority = thread_qos_policy_params.qos_pri[qos];
1092 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1093 
1094 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1095 			/* Bump priority 46 to 47 when in a frontmost app */
1096 			qos_scaled_relprio += 1;
1097 		}
1098 
1099 		/* TODO: factor in renice priority here? */
1100 
1101 		priority += qos_scaled_relprio;
1102 	} else {
1103 		if (thread->importance > MAXPRI) {
1104 			priority = MAXPRI;
1105 		} else if (thread->importance < -MAXPRI) {
1106 			priority = -MAXPRI;
1107 		} else {
1108 			priority = thread->importance;
1109 		}
1110 
1111 		priority += thread->task_priority;
1112 	}
1113 
1114 	/* Boost the priority of threads which are RT demoted. */
1115 	if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1116 		priority = MAX(priority, MAXPRI_USER);
1117 	}
1118 
1119 	priority = MAX(priority, thread->user_promotion_basepri);
1120 
1121 	/*
1122 	 * Clamp priority back into the allowed range for this task.
1123 	 *  The initial priority value could be out of this range due to:
1124 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1125 	 *      Task is user task (max-pri is 63)
1126 	 *      Task is kernel task (max-pri is 95)
1127 	 * Note that thread->importance is user-settable to any integer
1128 	 * via THREAD_PRECEDENCE_POLICY.
1129 	 */
1130 	adj_priority = priority;
1131 	adj_priority = MIN(adj_priority, thread->max_priority);
1132 	adj_priority = MAX(adj_priority, MINPRI);
1133 
1134 	/* Allow workload driven priorities to exceed max_priority. */
1135 	if (thread->effective_policy.thep_wi_driven) {
1136 		adj_priority = MAX(adj_priority, priority);
1137 	}
1138 
1139 	/* Allow priority to exceed max_priority for promotions. */
1140 	if (thread->effective_policy.thep_promote_above_task) {
1141 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1142 	}
1143 	priority = adj_priority;
1144 	assert3u(priority, <=, MAXPRI);
1145 
1146 	if (thread->saved_mode == TH_MODE_REALTIME &&
1147 	    sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1148 		priority = DEPRESSPRI;
1149 	}
1150 
1151 	if (thread->effective_policy.thep_terminated == TRUE) {
1152 		/*
1153 		 * We temporarily want to override the expected priority to
1154 		 * ensure that the thread exits in a timely manner.
1155 		 * Note that this is allowed to exceed thread->max_priority
1156 		 * so that the thread is no longer clamped to background
1157 		 * during the final exit phase.
1158 		 */
1159 		if (priority < thread->task_priority) {
1160 			priority = thread->task_priority;
1161 		}
1162 		if (priority < BASEPRI_DEFAULT) {
1163 			priority = BASEPRI_DEFAULT;
1164 		}
1165 	}
1166 
1167 #if !defined(XNU_TARGET_OS_OSX)
1168 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1169 	if (priority < MAXPRI_THROTTLE) {
1170 		priority = MAXPRI_THROTTLE;
1171 	}
1172 #endif /* !defined(XNU_TARGET_OS_OSX) */
1173 
1174 	sched_set_thread_base_priority(thread, priority);
1175 }
1176 
1177 /* Called with the task lock held, but not the thread mutex or spinlock */
1178 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1179 thread_policy_update_tasklocked(
1180 	thread_t           thread,
1181 	integer_t          priority,
1182 	integer_t          max_priority,
1183 	task_pend_token_t  pend_token)
1184 {
1185 	thread_mtx_lock(thread);
1186 
1187 	if (!thread->active || thread->policy_reset) {
1188 		thread_mtx_unlock(thread);
1189 		return;
1190 	}
1191 
1192 	spl_t s = splsched();
1193 	thread_lock(thread);
1194 
1195 	__unused
1196 	integer_t old_max_priority = thread->max_priority;
1197 
1198 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1199 	thread->task_priority = (int16_t)priority;
1200 
1201 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1202 	thread->max_priority = (int16_t)max_priority;
1203 
1204 	/*
1205 	 * When backgrounding a thread, realtime and fixed priority threads
1206 	 * should be demoted to timeshare background threads.
1207 	 *
1208 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1209 	 * remove/reinsert for a runnable thread
1210 	 */
1211 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1212 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1213 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1214 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1215 	}
1216 
1217 	thread_policy_update_spinlocked(thread, true, pend_token);
1218 
1219 	thread_unlock(thread);
1220 	splx(s);
1221 
1222 	thread_mtx_unlock(thread);
1223 }
1224 
1225 /*
1226  * Reset thread to default state in preparation for termination
1227  * Called with thread mutex locked
1228  *
1229  * Always called on current thread, so we don't need a run queue remove
1230  */
1231 void
thread_policy_reset(thread_t thread)1232 thread_policy_reset(
1233 	thread_t                thread)
1234 {
1235 	spl_t           s;
1236 
1237 	assert(thread == current_thread());
1238 
1239 	s = splsched();
1240 	thread_lock(thread);
1241 
1242 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1243 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1244 	}
1245 
1246 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1247 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1248 	}
1249 
1250 	if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1251 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1252 	}
1253 
1254 	/* At this point, the various demotions should be inactive */
1255 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1256 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1257 
1258 	/* Reset thread back to task-default basepri and mode  */
1259 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1260 
1261 	sched_set_thread_mode(thread, newmode);
1262 
1263 	thread->importance = 0;
1264 
1265 	/* Prevent further changes to thread base priority or mode */
1266 	thread->policy_reset = 1;
1267 
1268 	sched_set_thread_base_priority(thread, thread->task_priority);
1269 
1270 	thread_unlock(thread);
1271 	splx(s);
1272 }
1273 
1274 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1275 thread_policy_get(
1276 	thread_t                                thread,
1277 	thread_policy_flavor_t  flavor,
1278 	thread_policy_t                 policy_info,
1279 	mach_msg_type_number_t  *count,
1280 	boolean_t                               *get_default)
1281 {
1282 	kern_return_t                   result = KERN_SUCCESS;
1283 
1284 	if (thread == THREAD_NULL) {
1285 		return KERN_INVALID_ARGUMENT;
1286 	}
1287 
1288 	thread_mtx_lock(thread);
1289 	if (!thread->active) {
1290 		thread_mtx_unlock(thread);
1291 
1292 		return KERN_TERMINATED;
1293 	}
1294 
1295 	switch (flavor) {
1296 	case THREAD_EXTENDED_POLICY:
1297 	{
1298 		boolean_t               timeshare = TRUE;
1299 
1300 		if (!(*get_default)) {
1301 			spl_t s = splsched();
1302 			thread_lock(thread);
1303 
1304 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1305 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1306 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1307 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1308 				} else {
1309 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1310 				}
1311 			} else {
1312 				*get_default = TRUE;
1313 			}
1314 
1315 			thread_unlock(thread);
1316 			splx(s);
1317 		}
1318 
1319 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1320 			thread_extended_policy_t        info;
1321 
1322 			info = (thread_extended_policy_t)policy_info;
1323 			info->timeshare = timeshare;
1324 		}
1325 
1326 		break;
1327 	}
1328 
1329 	case THREAD_TIME_CONSTRAINT_POLICY:
1330 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1331 	{
1332 		thread_time_constraint_with_priority_policy_t         info;
1333 
1334 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1335 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1336 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1337 
1338 		if (*count < min_count) {
1339 			result = KERN_INVALID_ARGUMENT;
1340 			break;
1341 		}
1342 
1343 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1344 
1345 		if (!(*get_default)) {
1346 			spl_t s = splsched();
1347 			thread_lock(thread);
1348 
1349 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1350 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1351 				info->period = thread->realtime.period;
1352 				info->computation = thread->realtime.computation;
1353 				info->constraint = thread->realtime.constraint;
1354 				info->preemptible = thread->realtime.preemptible;
1355 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1356 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1357 				}
1358 			} else {
1359 				*get_default = TRUE;
1360 			}
1361 
1362 			thread_unlock(thread);
1363 			splx(s);
1364 		}
1365 
1366 		if (*get_default) {
1367 			info->period = 0;
1368 			info->computation = default_timeshare_computation;
1369 			info->constraint = default_timeshare_constraint;
1370 			info->preemptible = TRUE;
1371 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1372 				info->priority = BASEPRI_RTQUEUES;
1373 			}
1374 		}
1375 
1376 
1377 		break;
1378 	}
1379 
1380 	case THREAD_PRECEDENCE_POLICY:
1381 	{
1382 		thread_precedence_policy_t              info;
1383 
1384 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1385 			result = KERN_INVALID_ARGUMENT;
1386 			break;
1387 		}
1388 
1389 		info = (thread_precedence_policy_t)policy_info;
1390 
1391 		if (!(*get_default)) {
1392 			spl_t s = splsched();
1393 			thread_lock(thread);
1394 
1395 			info->importance = thread->importance;
1396 
1397 			thread_unlock(thread);
1398 			splx(s);
1399 		} else {
1400 			info->importance = 0;
1401 		}
1402 
1403 		break;
1404 	}
1405 
1406 	case THREAD_AFFINITY_POLICY:
1407 	{
1408 		thread_affinity_policy_t                info;
1409 
1410 		if (!thread_affinity_is_supported()) {
1411 			result = KERN_NOT_SUPPORTED;
1412 			break;
1413 		}
1414 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1415 			result = KERN_INVALID_ARGUMENT;
1416 			break;
1417 		}
1418 
1419 		info = (thread_affinity_policy_t)policy_info;
1420 
1421 		if (!(*get_default)) {
1422 			info->affinity_tag = thread_affinity_get(thread);
1423 		} else {
1424 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1425 		}
1426 
1427 		break;
1428 	}
1429 
1430 	case THREAD_POLICY_STATE:
1431 	{
1432 		thread_policy_state_t           info;
1433 
1434 		if (*count < THREAD_POLICY_STATE_COUNT) {
1435 			result = KERN_INVALID_ARGUMENT;
1436 			break;
1437 		}
1438 
1439 		/* Only root can get this info */
1440 		if (!task_is_privileged(current_task())) {
1441 			result = KERN_PROTECTION_FAILURE;
1442 			break;
1443 		}
1444 
1445 		info = (thread_policy_state_t)(void*)policy_info;
1446 
1447 		if (!(*get_default)) {
1448 			info->flags = 0;
1449 
1450 			spl_t s = splsched();
1451 			thread_lock(thread);
1452 
1453 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1454 
1455 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1456 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1457 
1458 			info->thps_user_promotions          = 0;
1459 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1460 			info->thps_ipc_overrides            = thread->kevent_overrides;
1461 
1462 			proc_get_thread_policy_bitfield(thread, info);
1463 
1464 			thread_unlock(thread);
1465 			splx(s);
1466 		} else {
1467 			info->requested = 0;
1468 			info->effective = 0;
1469 			info->pending = 0;
1470 		}
1471 
1472 		break;
1473 	}
1474 
1475 	case THREAD_REQUESTED_STATE_POLICY:
1476 	{
1477 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1478 			result = KERN_INVALID_ARGUMENT;
1479 			break;
1480 		}
1481 
1482 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1483 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1484 
1485 		info->thrq_base_qos = req_policy->thrp_qos;
1486 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1487 		info->thrq_qos_override = req_policy->thrp_qos_override;
1488 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1489 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1490 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1491 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1492 
1493 		break;
1494 	}
1495 
1496 	case THREAD_LATENCY_QOS_POLICY:
1497 	{
1498 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1499 		thread_latency_qos_t plqos;
1500 
1501 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1502 			result = KERN_INVALID_ARGUMENT;
1503 			break;
1504 		}
1505 
1506 		if (*get_default) {
1507 			plqos = 0;
1508 		} else {
1509 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1510 		}
1511 
1512 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1513 	}
1514 	break;
1515 
1516 	case THREAD_THROUGHPUT_QOS_POLICY:
1517 	{
1518 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1519 		thread_throughput_qos_t ptqos;
1520 
1521 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1522 			result = KERN_INVALID_ARGUMENT;
1523 			break;
1524 		}
1525 
1526 		if (*get_default) {
1527 			ptqos = 0;
1528 		} else {
1529 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1530 		}
1531 
1532 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1533 	}
1534 	break;
1535 
1536 	case THREAD_QOS_POLICY:
1537 	{
1538 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1539 
1540 		if (*count < THREAD_QOS_POLICY_COUNT) {
1541 			result = KERN_INVALID_ARGUMENT;
1542 			break;
1543 		}
1544 
1545 		if (!(*get_default)) {
1546 			int relprio_value = 0;
1547 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1548 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1549 
1550 			info->tier_importance = -relprio_value;
1551 		} else {
1552 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1553 			info->tier_importance = 0;
1554 		}
1555 
1556 		break;
1557 	}
1558 
1559 	default:
1560 		result = KERN_INVALID_ARGUMENT;
1561 		break;
1562 	}
1563 
1564 	thread_mtx_unlock(thread);
1565 
1566 	return result;
1567 }
1568 
1569 void
thread_policy_create(thread_t thread)1570 thread_policy_create(thread_t thread)
1571 {
1572 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1573 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1574 	    thread_tid(thread), theffective_0(thread),
1575 	    theffective_1(thread), thread->base_pri, 0);
1576 
1577 	/* We pass a pend token but ignore it */
1578 	struct task_pend_token pend_token = {};
1579 
1580 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1581 
1582 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1583 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1584 	    thread_tid(thread), theffective_0(thread),
1585 	    theffective_1(thread), thread->base_pri, 0);
1586 }
1587 
1588 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1589 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1590 {
1591 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1592 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1593 	    thread_tid(thread), theffective_0(thread),
1594 	    theffective_1(thread), thread->base_pri, 0);
1595 
1596 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1597 
1598 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1599 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1600 	    thread_tid(thread), theffective_0(thread),
1601 	    theffective_1(thread), thread->base_pri, 0);
1602 }
1603 
1604 
1605 
1606 /*
1607  * One thread state update function TO RULE THEM ALL
1608  *
1609  * This function updates the thread effective policy fields
1610  * and pushes the results to the relevant subsystems.
1611  *
1612  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1613  */
1614 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1615 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1616     task_pend_token_t pend_token)
1617 {
1618 	/*
1619 	 * Step 1:
1620 	 *  Gather requested policy and effective task state
1621 	 */
1622 
1623 	const struct thread_requested_policy requested = thread->requested_policy;
1624 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1625 
1626 	/*
1627 	 * Step 2:
1628 	 *  Calculate new effective policies from requested policy, task and thread state
1629 	 *  Rules:
1630 	 *      Don't change requested, it won't take effect
1631 	 */
1632 
1633 	struct thread_effective_policy next = {};
1634 
1635 	next.thep_wi_driven = requested.thrp_wi_driven;
1636 
1637 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1638 
1639 	uint32_t next_qos = requested.thrp_qos;
1640 
1641 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1642 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1643 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1644 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1645 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1646 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1647 	}
1648 
1649 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1650 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1651 		/*
1652 		 * This thread is turnstile-boosted higher than the adaptive clamp
1653 		 * by a synchronous waiter. Allow that to override the adaptive
1654 		 * clamp temporarily for this thread only.
1655 		 */
1656 		next.thep_promote_above_task = true;
1657 		next_qos = requested.thrp_qos_promote;
1658 	}
1659 
1660 	next.thep_qos = next_qos;
1661 
1662 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1663 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1664 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1665 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1666 		} else {
1667 			next.thep_qos = task_effective.tep_qos_clamp;
1668 		}
1669 		next.thep_wi_driven = 0;
1670 	}
1671 
1672 	/*
1673 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1674 	 * This allows QoS promotions to work properly even after the process is unclamped.
1675 	 */
1676 	next.thep_qos_promote = next.thep_qos;
1677 
1678 	/* The ceiling only applies to threads that are in the QoS world */
1679 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1680 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1681 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1682 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1683 	}
1684 
1685 	/*
1686 	 * The QoS relative priority is only applicable when the original programmer's
1687 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1688 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1689 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1690 	 * presence of boosting, the programmer doesn't know what other actors
1691 	 * are boosting the thread.
1692 	 */
1693 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1694 	    (requested.thrp_qos == next.thep_qos) &&
1695 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1696 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1697 	} else {
1698 		next.thep_qos_relprio = 0;
1699 	}
1700 
1701 	/* Calculate DARWIN_BG */
1702 	bool wants_darwinbg        = false;
1703 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1704 
1705 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1706 		wants_darwinbg = true;
1707 	}
1708 
1709 	/*
1710 	 * If DARWIN_BG has been requested at either level, it's engaged.
1711 	 * darwinbg threads always create bg sockets,
1712 	 * but only some types of darwinbg change the sockets
1713 	 * after they're created
1714 	 */
1715 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1716 		wants_all_sockets_bg = wants_darwinbg = true;
1717 	}
1718 
1719 	if (requested.thrp_pidbind_bg) {
1720 		wants_all_sockets_bg = wants_darwinbg = true;
1721 	}
1722 
1723 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1724 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1725 		wants_darwinbg = true;
1726 	}
1727 
1728 	/* Calculate side effects of DARWIN_BG */
1729 
1730 	if (wants_darwinbg) {
1731 		next.thep_darwinbg = 1;
1732 		next.thep_wi_driven = 0;
1733 	}
1734 
1735 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1736 		next.thep_new_sockets_bg = 1;
1737 	}
1738 
1739 	/* Don't use task_effective.tep_all_sockets_bg here */
1740 	if (wants_all_sockets_bg) {
1741 		next.thep_all_sockets_bg = 1;
1742 	}
1743 
1744 	/* darwinbg implies background QOS (or lower) */
1745 	if (next.thep_darwinbg &&
1746 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1747 		next.thep_qos = THREAD_QOS_BACKGROUND;
1748 		next.thep_qos_relprio = 0;
1749 	}
1750 
1751 	/* Calculate IO policy */
1752 
1753 	int iopol = THROTTLE_LEVEL_TIER0;
1754 
1755 	/* Factor in the task's IO policy */
1756 	if (next.thep_darwinbg) {
1757 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1758 	}
1759 
1760 	if (!next.thep_promote_above_task) {
1761 		iopol = MAX(iopol, task_effective.tep_io_tier);
1762 	}
1763 
1764 	/* Look up the associated IO tier value for the QoS class */
1765 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1766 
1767 	iopol = MAX(iopol, requested.thrp_int_iotier);
1768 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1769 
1770 	/* Apply the kevent iotier override */
1771 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1772 
1773 	next.thep_io_tier = iopol;
1774 
1775 	/*
1776 	 * If a QoS override is causing IO to go into a lower tier, we also set
1777 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1778 	 * window when the override goes away.
1779 	 */
1780 
1781 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1782 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1783 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1784 
1785 	/* Calculate Passive IO policy */
1786 	if (requested.thrp_ext_iopassive ||
1787 	    requested.thrp_int_iopassive ||
1788 	    qos_io_override_active ||
1789 	    task_effective.tep_io_passive) {
1790 		next.thep_io_passive = 1;
1791 	}
1792 
1793 	/* Calculate timer QOS */
1794 	uint32_t latency_qos = requested.thrp_latency_qos;
1795 
1796 	if (!next.thep_promote_above_task) {
1797 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1798 	}
1799 
1800 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1801 
1802 	next.thep_latency_qos = latency_qos;
1803 
1804 	/* Calculate throughput QOS */
1805 	uint32_t through_qos = requested.thrp_through_qos;
1806 
1807 	if (!next.thep_promote_above_task) {
1808 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1809 	}
1810 
1811 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1812 
1813 	next.thep_through_qos = through_qos;
1814 
1815 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1816 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1817 		next.thep_terminated    = 1;
1818 		next.thep_darwinbg      = 0;
1819 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1820 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1821 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1822 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1823 		next.thep_wi_driven     = 0;
1824 	}
1825 
1826 	/*
1827 	 * Step 3:
1828 	 *  Swap out old policy for new policy
1829 	 */
1830 
1831 	struct thread_effective_policy prev = thread->effective_policy;
1832 
1833 	thread_update_qos_cpu_time_locked(thread);
1834 
1835 	/* This is the point where the new values become visible to other threads */
1836 	thread->effective_policy = next;
1837 
1838 	/*
1839 	 * Step 4:
1840 	 *  Pend updates that can't be done while holding the thread lock
1841 	 */
1842 
1843 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1844 		pend_token->tpt_update_sockets = 1;
1845 	}
1846 
1847 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1848 	if (prev.thep_io_tier != next.thep_io_tier) {
1849 		pend_token->tpt_update_throttle = 1;
1850 	}
1851 
1852 	/*
1853 	 * Check for the attributes that sfi_thread_classify() consults,
1854 	 *  and trigger SFI re-evaluation.
1855 	 */
1856 	if (prev.thep_qos != next.thep_qos ||
1857 	    prev.thep_darwinbg != next.thep_darwinbg) {
1858 		pend_token->tpt_update_thread_sfi = 1;
1859 	}
1860 
1861 	integer_t old_base_pri = thread->base_pri;
1862 
1863 	/*
1864 	 * Step 5:
1865 	 *  Update other subsystems as necessary if something has changed
1866 	 */
1867 
1868 	/* Check for the attributes that thread_recompute_priority() consults */
1869 	if (prev.thep_qos != next.thep_qos ||
1870 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1871 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1872 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1873 	    prev.thep_terminated != next.thep_terminated ||
1874 	    prev.thep_wi_driven != next.thep_wi_driven ||
1875 	    pend_token->tpt_force_recompute_pri == 1 ||
1876 	    recompute_priority) {
1877 		thread_recompute_priority(thread);
1878 	}
1879 
1880 	/*
1881 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1882 	 */
1883 	if (pend_token->tpt_update_turnstile &&
1884 	    ((old_base_pri == thread->base_pri) ||
1885 	    !thread_get_waiting_turnstile(thread))) {
1886 		/*
1887 		 * Reset update turnstile pend token since either
1888 		 * the thread priority did not change or thread is
1889 		 * not blocked on a turnstile.
1890 		 */
1891 		pend_token->tpt_update_turnstile = 0;
1892 	}
1893 }
1894 
1895 
1896 /*
1897  * Initiate a thread policy state transition on a thread with its TID
1898  * Useful if you cannot guarantee the thread won't get terminated
1899  * Precondition: No locks are held
1900  * Will take task lock - using the non-tid variant is faster
1901  * if you already have a thread ref.
1902  */
1903 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1904 proc_set_thread_policy_with_tid(task_t     task,
1905     uint64_t   tid,
1906     int        category,
1907     int        flavor,
1908     int        value)
1909 {
1910 	/* takes task lock, returns ref'ed thread or NULL */
1911 	thread_t thread = task_findtid(task, tid);
1912 
1913 	if (thread == THREAD_NULL) {
1914 		return;
1915 	}
1916 
1917 	proc_set_thread_policy(thread, category, flavor, value);
1918 
1919 	thread_deallocate(thread);
1920 }
1921 
1922 /*
1923  * Initiate a thread policy transition on a thread
1924  * This path supports networking transitions (i.e. darwinbg transitions)
1925  * Precondition: No locks are held
1926  */
1927 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1928 proc_set_thread_policy(thread_t   thread,
1929     int        category,
1930     int        flavor,
1931     int        value)
1932 {
1933 	proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1934 }
1935 
1936 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1937 proc_set_thread_policy_ext(thread_t   thread,
1938     int        category,
1939     int        flavor,
1940     int        value,
1941     int        value2)
1942 {
1943 	struct task_pend_token pend_token = {};
1944 
1945 	thread_mtx_lock(thread);
1946 
1947 	proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1948 
1949 	thread_mtx_unlock(thread);
1950 
1951 	thread_policy_update_complete_unlocked(thread, &pend_token);
1952 }
1953 
1954 /*
1955  * Do the things that can't be done while holding a thread mutex.
1956  * These are set up to call back into thread policy to get the latest value,
1957  * so they don't have to be synchronized with the update.
1958  * The only required semantic is 'call this sometime after updating effective policy'
1959  *
1960  * Precondition: Thread mutex is not held
1961  *
1962  * This may be called with the task lock held, but in that case it won't be
1963  * called with tpt_update_sockets set.
1964  */
1965 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1966 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1967 {
1968 #ifdef MACH_BSD
1969 	if (pend_token->tpt_update_sockets) {
1970 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1971 	}
1972 #endif /* MACH_BSD */
1973 
1974 	if (pend_token->tpt_update_throttle) {
1975 		rethrottle_thread(get_bsdthread_info(thread));
1976 	}
1977 
1978 	if (pend_token->tpt_update_thread_sfi) {
1979 		sfi_reevaluate(thread);
1980 	}
1981 
1982 	if (pend_token->tpt_update_turnstile) {
1983 		turnstile_update_thread_priority_chain(thread);
1984 	}
1985 }
1986 
1987 /*
1988  * Set and update thread policy
1989  * Thread mutex might be held
1990  */
1991 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1992 proc_set_thread_policy_locked(thread_t          thread,
1993     int               category,
1994     int               flavor,
1995     int               value,
1996     int               value2,
1997     task_pend_token_t pend_token)
1998 {
1999 	spl_t s = splsched();
2000 	thread_lock(thread);
2001 
2002 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2003 
2004 	thread_unlock(thread);
2005 	splx(s);
2006 }
2007 
2008 /*
2009  * Set and update thread policy
2010  * Thread spinlock is held
2011  */
2012 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2013 proc_set_thread_policy_spinlocked(thread_t          thread,
2014     int               category,
2015     int               flavor,
2016     int               value,
2017     int               value2,
2018     task_pend_token_t pend_token)
2019 {
2020 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2021 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2022 	    thread_tid(thread), threquested_0(thread),
2023 	    threquested_1(thread), value, 0);
2024 
2025 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2026 
2027 	thread_policy_update_spinlocked(thread, false, pend_token);
2028 
2029 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2030 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2031 	    thread_tid(thread), threquested_0(thread),
2032 	    threquested_1(thread), tpending(pend_token), 0);
2033 }
2034 
2035 /*
2036  * Set the requested state for a specific flavor to a specific value.
2037  */
2038 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2039 thread_set_requested_policy_spinlocked(thread_t     thread,
2040     int               category,
2041     int               flavor,
2042     int               value,
2043     int               value2,
2044     task_pend_token_t pend_token)
2045 {
2046 	int tier, passive;
2047 
2048 	struct thread_requested_policy requested = thread->requested_policy;
2049 
2050 	switch (flavor) {
2051 	/* Category: EXTERNAL and INTERNAL, thread and task */
2052 
2053 	case TASK_POLICY_DARWIN_BG:
2054 		if (category == TASK_POLICY_EXTERNAL) {
2055 			requested.thrp_ext_darwinbg = value;
2056 		} else {
2057 			requested.thrp_int_darwinbg = value;
2058 		}
2059 		break;
2060 
2061 	case TASK_POLICY_IOPOL:
2062 		proc_iopol_to_tier(value, &tier, &passive);
2063 		if (category == TASK_POLICY_EXTERNAL) {
2064 			requested.thrp_ext_iotier  = tier;
2065 			requested.thrp_ext_iopassive = passive;
2066 		} else {
2067 			requested.thrp_int_iotier  = tier;
2068 			requested.thrp_int_iopassive = passive;
2069 		}
2070 		break;
2071 
2072 	case TASK_POLICY_IO:
2073 		if (category == TASK_POLICY_EXTERNAL) {
2074 			requested.thrp_ext_iotier = value;
2075 		} else {
2076 			requested.thrp_int_iotier = value;
2077 		}
2078 		break;
2079 
2080 	case TASK_POLICY_PASSIVE_IO:
2081 		if (category == TASK_POLICY_EXTERNAL) {
2082 			requested.thrp_ext_iopassive = value;
2083 		} else {
2084 			requested.thrp_int_iopassive = value;
2085 		}
2086 		break;
2087 
2088 	/* Category: ATTRIBUTE, thread only */
2089 
2090 	case TASK_POLICY_PIDBIND_BG:
2091 		assert(category == TASK_POLICY_ATTRIBUTE);
2092 		requested.thrp_pidbind_bg = value;
2093 		break;
2094 
2095 	case TASK_POLICY_LATENCY_QOS:
2096 		assert(category == TASK_POLICY_ATTRIBUTE);
2097 		requested.thrp_latency_qos = value;
2098 		break;
2099 
2100 	case TASK_POLICY_THROUGH_QOS:
2101 		assert(category == TASK_POLICY_ATTRIBUTE);
2102 		requested.thrp_through_qos = value;
2103 		break;
2104 
2105 	case TASK_POLICY_QOS_OVERRIDE:
2106 		assert(category == TASK_POLICY_ATTRIBUTE);
2107 		requested.thrp_qos_override = value;
2108 		pend_token->tpt_update_turnstile = 1;
2109 		break;
2110 
2111 	case TASK_POLICY_QOS_AND_RELPRIO:
2112 		assert(category == TASK_POLICY_ATTRIBUTE);
2113 		requested.thrp_qos = value;
2114 		requested.thrp_qos_relprio = value2;
2115 		pend_token->tpt_update_turnstile = 1;
2116 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2117 		break;
2118 
2119 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2120 		assert(category == TASK_POLICY_ATTRIBUTE);
2121 		requested.thrp_qos_workq_override = value;
2122 		pend_token->tpt_update_turnstile = 1;
2123 		break;
2124 
2125 	case TASK_POLICY_QOS_PROMOTE:
2126 		assert(category == TASK_POLICY_ATTRIBUTE);
2127 		requested.thrp_qos_promote = value;
2128 		break;
2129 
2130 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2131 		assert(category == TASK_POLICY_ATTRIBUTE);
2132 		requested.thrp_qos_kevent_override = value;
2133 		pend_token->tpt_update_turnstile = 1;
2134 		break;
2135 
2136 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2137 		assert(category == TASK_POLICY_ATTRIBUTE);
2138 		requested.thrp_qos_wlsvc_override = value;
2139 		pend_token->tpt_update_turnstile = 1;
2140 		break;
2141 
2142 	case TASK_POLICY_TERMINATED:
2143 		assert(category == TASK_POLICY_ATTRIBUTE);
2144 		requested.thrp_terminated = value;
2145 		break;
2146 
2147 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2148 		assert(category == TASK_POLICY_ATTRIBUTE);
2149 		requested.thrp_iotier_kevent_override = value;
2150 		break;
2151 
2152 	case TASK_POLICY_WI_DRIVEN:
2153 		assert(category == TASK_POLICY_ATTRIBUTE);
2154 		assert(thread == current_thread());
2155 
2156 		const bool set_policy = value;
2157 		const sched_mode_t mode = value2;
2158 
2159 		requested.thrp_wi_driven = set_policy ? 1 : 0;
2160 
2161 		/*
2162 		 * No sched mode change for REALTIME (threads must explicitly
2163 		 * opt-in), however the priority_offset needs to be updated.
2164 		 */
2165 		if (mode == TH_MODE_REALTIME) {
2166 			const int pri = work_interval_get_priority(thread);
2167 			assert3u(pri, >=, BASEPRI_RTQUEUES);
2168 			thread->realtime.priority_offset = set_policy ?
2169 			    (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2170 		} else {
2171 			sched_set_thread_mode_user(thread, mode);
2172 			if (set_policy) {
2173 				thread->static_param = true;
2174 			}
2175 		}
2176 		break;
2177 
2178 	default:
2179 		panic("unknown task policy: %d %d %d", category, flavor, value);
2180 		break;
2181 	}
2182 
2183 	thread->requested_policy = requested;
2184 }
2185 
2186 /*
2187  * Gets what you set. Effective values may be different.
2188  * Precondition: No locks are held
2189  */
2190 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2191 proc_get_thread_policy(thread_t   thread,
2192     int        category,
2193     int        flavor)
2194 {
2195 	int value = 0;
2196 	thread_mtx_lock(thread);
2197 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2198 	thread_mtx_unlock(thread);
2199 	return value;
2200 }
2201 
2202 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2203 proc_get_thread_policy_locked(thread_t   thread,
2204     int        category,
2205     int        flavor,
2206     int*       value2)
2207 {
2208 	int value = 0;
2209 
2210 	spl_t s = splsched();
2211 	thread_lock(thread);
2212 
2213 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2214 
2215 	thread_unlock(thread);
2216 	splx(s);
2217 
2218 	return value;
2219 }
2220 
2221 /*
2222  * Gets what you set. Effective values may be different.
2223  */
2224 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2225 thread_get_requested_policy_spinlocked(thread_t thread,
2226     int      category,
2227     int      flavor,
2228     int*     value2)
2229 {
2230 	int value = 0;
2231 
2232 	struct thread_requested_policy requested = thread->requested_policy;
2233 
2234 	switch (flavor) {
2235 	case TASK_POLICY_DARWIN_BG:
2236 		if (category == TASK_POLICY_EXTERNAL) {
2237 			value = requested.thrp_ext_darwinbg;
2238 		} else {
2239 			value = requested.thrp_int_darwinbg;
2240 		}
2241 		break;
2242 	case TASK_POLICY_IOPOL:
2243 		if (category == TASK_POLICY_EXTERNAL) {
2244 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2245 			    requested.thrp_ext_iopassive);
2246 		} else {
2247 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2248 			    requested.thrp_int_iopassive);
2249 		}
2250 		break;
2251 	case TASK_POLICY_IO:
2252 		if (category == TASK_POLICY_EXTERNAL) {
2253 			value = requested.thrp_ext_iotier;
2254 		} else {
2255 			value = requested.thrp_int_iotier;
2256 		}
2257 		break;
2258 	case TASK_POLICY_PASSIVE_IO:
2259 		if (category == TASK_POLICY_EXTERNAL) {
2260 			value = requested.thrp_ext_iopassive;
2261 		} else {
2262 			value = requested.thrp_int_iopassive;
2263 		}
2264 		break;
2265 	case TASK_POLICY_QOS:
2266 		assert(category == TASK_POLICY_ATTRIBUTE);
2267 		value = requested.thrp_qos;
2268 		break;
2269 	case TASK_POLICY_QOS_OVERRIDE:
2270 		assert(category == TASK_POLICY_ATTRIBUTE);
2271 		value = requested.thrp_qos_override;
2272 		break;
2273 	case TASK_POLICY_LATENCY_QOS:
2274 		assert(category == TASK_POLICY_ATTRIBUTE);
2275 		value = requested.thrp_latency_qos;
2276 		break;
2277 	case TASK_POLICY_THROUGH_QOS:
2278 		assert(category == TASK_POLICY_ATTRIBUTE);
2279 		value = requested.thrp_through_qos;
2280 		break;
2281 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2282 		assert(category == TASK_POLICY_ATTRIBUTE);
2283 		value = requested.thrp_qos_workq_override;
2284 		break;
2285 	case TASK_POLICY_QOS_AND_RELPRIO:
2286 		assert(category == TASK_POLICY_ATTRIBUTE);
2287 		assert(value2 != NULL);
2288 		value = requested.thrp_qos;
2289 		*value2 = requested.thrp_qos_relprio;
2290 		break;
2291 	case TASK_POLICY_QOS_PROMOTE:
2292 		assert(category == TASK_POLICY_ATTRIBUTE);
2293 		value = requested.thrp_qos_promote;
2294 		break;
2295 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2296 		assert(category == TASK_POLICY_ATTRIBUTE);
2297 		value = requested.thrp_qos_kevent_override;
2298 		break;
2299 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2300 		assert(category == TASK_POLICY_ATTRIBUTE);
2301 		value = requested.thrp_qos_wlsvc_override;
2302 		break;
2303 	case TASK_POLICY_TERMINATED:
2304 		assert(category == TASK_POLICY_ATTRIBUTE);
2305 		value = requested.thrp_terminated;
2306 		break;
2307 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2308 		assert(category == TASK_POLICY_ATTRIBUTE);
2309 		value = requested.thrp_iotier_kevent_override;
2310 		break;
2311 
2312 	case TASK_POLICY_WI_DRIVEN:
2313 		assert(category == TASK_POLICY_ATTRIBUTE);
2314 		value = requested.thrp_wi_driven;
2315 		break;
2316 
2317 	default:
2318 		panic("unknown policy_flavor %d", flavor);
2319 		break;
2320 	}
2321 
2322 	return value;
2323 }
2324 
2325 /*
2326  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2327  *
2328  * NOTE: This accessor does not take the task or thread lock.
2329  * Notifications of state updates need to be externally synchronized with state queries.
2330  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2331  * within the context of a timer interrupt.
2332  *
2333  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2334  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2335  *      I don't think that cost is worth not having the right answer.
2336  */
2337 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2338 proc_get_effective_thread_policy(thread_t thread,
2339     int      flavor)
2340 {
2341 	int value = 0;
2342 
2343 	switch (flavor) {
2344 	case TASK_POLICY_DARWIN_BG:
2345 		/*
2346 		 * This call is used within the timer layer, as well as
2347 		 * prioritizing requests to the graphics system.
2348 		 * It also informs SFI and originator-bg-state.
2349 		 * Returns 1 for background mode, 0 for normal mode
2350 		 */
2351 
2352 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2353 		break;
2354 	case TASK_POLICY_IO:
2355 		/*
2356 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2357 		 * Returns THROTTLE_LEVEL_* values
2358 		 */
2359 		value = thread->effective_policy.thep_io_tier;
2360 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2361 			value = MIN(value, thread->iotier_override);
2362 		}
2363 		break;
2364 	case TASK_POLICY_PASSIVE_IO:
2365 		/*
2366 		 * The I/O system calls here to find out whether an operation should be passive.
2367 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2368 		 * Returns 1 for passive mode, 0 for normal mode
2369 		 *
2370 		 * If an override is causing IO to go into a lower tier, we also set
2371 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2372 		 * window when the override goes away.
2373 		 */
2374 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2375 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2376 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2377 			value = 1;
2378 		}
2379 		break;
2380 	case TASK_POLICY_ALL_SOCKETS_BG:
2381 		/*
2382 		 * do_background_socket() calls this to determine whether
2383 		 * it should change the thread's sockets
2384 		 * Returns 1 for background mode, 0 for normal mode
2385 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2386 		 * doesn't get you out of the network throttle.
2387 		 */
2388 		value = (thread->effective_policy.thep_all_sockets_bg ||
2389 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2390 		break;
2391 	case TASK_POLICY_NEW_SOCKETS_BG:
2392 		/*
2393 		 * socreate() calls this to determine if it should mark a new socket as background
2394 		 * Returns 1 for background mode, 0 for normal mode
2395 		 */
2396 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2397 		break;
2398 	case TASK_POLICY_LATENCY_QOS:
2399 		/*
2400 		 * timer arming calls into here to find out the timer coalescing level
2401 		 * Returns a latency QoS tier (0-6)
2402 		 */
2403 		value = thread->effective_policy.thep_latency_qos;
2404 		break;
2405 	case TASK_POLICY_THROUGH_QOS:
2406 		/*
2407 		 * This value is passed into the urgency callout from the scheduler
2408 		 * to the performance management subsystem.
2409 		 *
2410 		 * Returns a throughput QoS tier (0-6)
2411 		 */
2412 		value = thread->effective_policy.thep_through_qos;
2413 		break;
2414 	case TASK_POLICY_QOS:
2415 		/*
2416 		 * This is communicated to the performance management layer and SFI.
2417 		 *
2418 		 * Returns a QoS policy tier
2419 		 */
2420 		value = thread->effective_policy.thep_qos;
2421 		break;
2422 	default:
2423 		panic("unknown thread policy flavor %d", flavor);
2424 		break;
2425 	}
2426 
2427 	return value;
2428 }
2429 
2430 
2431 /*
2432  * (integer_t) casts limit the number of bits we can fit here
2433  * this interface is deprecated and replaced by the _EXT struct ?
2434  */
2435 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2436 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2437 {
2438 	uint64_t bits = 0;
2439 	struct thread_requested_policy requested = thread->requested_policy;
2440 
2441 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2442 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2443 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2444 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2445 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2446 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2447 
2448 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2449 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2450 
2451 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2452 
2453 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2454 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2455 
2456 	info->requested = (integer_t) bits;
2457 	bits = 0;
2458 
2459 	struct thread_effective_policy effective = thread->effective_policy;
2460 
2461 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2462 
2463 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2464 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2465 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2466 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2467 
2468 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2469 
2470 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2471 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2472 
2473 	info->effective = (integer_t)bits;
2474 	bits = 0;
2475 
2476 	info->pending = 0;
2477 }
2478 
2479 /*
2480  * Sneakily trace either the task and thread requested
2481  * or just the thread requested, depending on if we have enough room.
2482  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2483  *
2484  *                                LP32            LP64
2485  * threquested_0(thread)          thread[0]       task[0]
2486  * threquested_1(thread)          thread[1]       thread[0]
2487  *
2488  */
2489 
2490 uintptr_t
threquested_0(thread_t thread)2491 threquested_0(thread_t thread)
2492 {
2493 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2494 
2495 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2496 
2497 	return raw[0];
2498 }
2499 
2500 uintptr_t
threquested_1(thread_t thread)2501 threquested_1(thread_t thread)
2502 {
2503 #if defined __LP64__
2504 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2505 #else
2506 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2507 	return raw[1];
2508 #endif
2509 }
2510 
2511 uintptr_t
theffective_0(thread_t thread)2512 theffective_0(thread_t thread)
2513 {
2514 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2515 
2516 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2517 	return raw[0];
2518 }
2519 
2520 uintptr_t
theffective_1(thread_t thread)2521 theffective_1(thread_t thread)
2522 {
2523 #if defined __LP64__
2524 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2525 #else
2526 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2527 	return raw[1];
2528 #endif
2529 }
2530 
2531 
2532 /*
2533  * Set an override on the thread which is consulted with a
2534  * higher priority than the task/thread policy. This should
2535  * only be set for temporary grants until the thread
2536  * returns to the userspace boundary
2537  *
2538  * We use atomic operations to swap in the override, with
2539  * the assumption that the thread itself can
2540  * read the override and clear it on return to userspace.
2541  *
2542  * No locking is performed, since it is acceptable to see
2543  * a stale override for one loop through throttle_lowpri_io().
2544  * However a thread reference must be held on the thread.
2545  */
2546 
2547 void
set_thread_iotier_override(thread_t thread,int policy)2548 set_thread_iotier_override(thread_t thread, int policy)
2549 {
2550 	int current_override;
2551 
2552 	/* Let most aggressive I/O policy win until user boundary */
2553 	do {
2554 		current_override = thread->iotier_override;
2555 
2556 		if (current_override != THROTTLE_LEVEL_NONE) {
2557 			policy = MIN(current_override, policy);
2558 		}
2559 
2560 		if (current_override == policy) {
2561 			/* no effective change */
2562 			return;
2563 		}
2564 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2565 
2566 	/*
2567 	 * Since the thread may be currently throttled,
2568 	 * re-evaluate tiers and potentially break out
2569 	 * of an msleep
2570 	 */
2571 	rethrottle_thread(get_bsdthread_info(thread));
2572 }
2573 
2574 /*
2575  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2576  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2577  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2578  * priority thread. In these cases, we attempt to propagate the priority token, as long
2579  * as the subsystem informs us of the relationships between the threads. The userspace
2580  * synchronization subsystem should maintain the information of owner->resource and
2581  * resource->waiters itself.
2582  */
2583 
2584 /*
2585  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2586  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2587  * to be handled specially in the future, but for now it's fine to slam
2588  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2589  */
2590 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2591 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2592 {
2593 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2594 		/* Map all input resource/type to a single one */
2595 		*resource = USER_ADDR_NULL;
2596 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2597 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2598 		/* no transform */
2599 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2600 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2601 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2602 			*resource = USER_ADDR_NULL;
2603 		}
2604 	}
2605 }
2606 
2607 /* This helper routine finds an existing override if known. Locking should be done by caller */
2608 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2609 find_qos_override(thread_t thread,
2610     user_addr_t resource,
2611     int resource_type)
2612 {
2613 	struct thread_qos_override *override;
2614 
2615 	override = thread->overrides;
2616 	while (override) {
2617 		if (override->override_resource == resource &&
2618 		    override->override_resource_type == resource_type) {
2619 			return override;
2620 		}
2621 
2622 		override = override->override_next;
2623 	}
2624 
2625 	return NULL;
2626 }
2627 
2628 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2629 find_and_decrement_qos_override(thread_t       thread,
2630     user_addr_t    resource,
2631     int            resource_type,
2632     boolean_t      reset,
2633     struct thread_qos_override **free_override_list)
2634 {
2635 	struct thread_qos_override *override, *override_prev;
2636 
2637 	override_prev = NULL;
2638 	override = thread->overrides;
2639 	while (override) {
2640 		struct thread_qos_override *override_next = override->override_next;
2641 
2642 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2643 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2644 			if (reset) {
2645 				override->override_contended_resource_count = 0;
2646 			} else {
2647 				override->override_contended_resource_count--;
2648 			}
2649 
2650 			if (override->override_contended_resource_count == 0) {
2651 				if (override_prev == NULL) {
2652 					thread->overrides = override_next;
2653 				} else {
2654 					override_prev->override_next = override_next;
2655 				}
2656 
2657 				/* Add to out-param for later zfree */
2658 				override->override_next = *free_override_list;
2659 				*free_override_list = override;
2660 			} else {
2661 				override_prev = override;
2662 			}
2663 
2664 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2665 				return;
2666 			}
2667 		} else {
2668 			override_prev = override;
2669 		}
2670 
2671 		override = override_next;
2672 	}
2673 }
2674 
2675 /* This helper recalculates the current requested override using the policy selected at boot */
2676 static int
calculate_requested_qos_override(thread_t thread)2677 calculate_requested_qos_override(thread_t thread)
2678 {
2679 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2680 		return THREAD_QOS_UNSPECIFIED;
2681 	}
2682 
2683 	/* iterate over all overrides and calculate MAX */
2684 	struct thread_qos_override *override;
2685 	int qos_override = THREAD_QOS_UNSPECIFIED;
2686 
2687 	override = thread->overrides;
2688 	while (override) {
2689 		qos_override = MAX(qos_override, override->override_qos);
2690 		override = override->override_next;
2691 	}
2692 
2693 	return qos_override;
2694 }
2695 
2696 /*
2697  * Returns:
2698  * - 0 on success
2699  * - EINVAL if some invalid input was passed
2700  */
2701 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2702 proc_thread_qos_add_override_internal(thread_t         thread,
2703     int              override_qos,
2704     boolean_t        first_override_for_resource,
2705     user_addr_t      resource,
2706     int              resource_type)
2707 {
2708 	struct task_pend_token pend_token = {};
2709 	int rc = 0;
2710 
2711 	thread_mtx_lock(thread);
2712 
2713 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2714 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2715 
2716 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2717 	    uint64_t, thread->requested_policy.thrp_qos,
2718 	    uint64_t, thread->effective_policy.thep_qos,
2719 	    int, override_qos, boolean_t, first_override_for_resource);
2720 
2721 	struct thread_qos_override *override;
2722 	struct thread_qos_override *override_new = NULL;
2723 	int new_qos_override, prev_qos_override;
2724 	int new_effective_qos;
2725 
2726 	canonicalize_resource_and_type(&resource, &resource_type);
2727 
2728 	override = find_qos_override(thread, resource, resource_type);
2729 	if (first_override_for_resource && !override) {
2730 		/* We need to allocate a new object. Drop the thread lock and
2731 		 * recheck afterwards in case someone else added the override
2732 		 */
2733 		thread_mtx_unlock(thread);
2734 		override_new = zalloc(thread_qos_override_zone);
2735 		thread_mtx_lock(thread);
2736 		override = find_qos_override(thread, resource, resource_type);
2737 	}
2738 	if (first_override_for_resource && override) {
2739 		/* Someone else already allocated while the thread lock was dropped */
2740 		override->override_contended_resource_count++;
2741 	} else if (!override && override_new) {
2742 		override = override_new;
2743 		override_new = NULL;
2744 		override->override_next = thread->overrides;
2745 		/* since first_override_for_resource was TRUE */
2746 		override->override_contended_resource_count = 1;
2747 		override->override_resource = resource;
2748 		override->override_resource_type = (int16_t)resource_type;
2749 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2750 		thread->overrides = override;
2751 	}
2752 
2753 	if (override) {
2754 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2755 			override->override_qos = (int16_t)override_qos;
2756 		} else {
2757 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2758 		}
2759 	}
2760 
2761 	/* Determine how to combine the various overrides into a single current
2762 	 * requested override
2763 	 */
2764 	new_qos_override = calculate_requested_qos_override(thread);
2765 
2766 	prev_qos_override = proc_get_thread_policy_locked(thread,
2767 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2768 
2769 	if (new_qos_override != prev_qos_override) {
2770 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2771 		    TASK_POLICY_QOS_OVERRIDE,
2772 		    new_qos_override, 0, &pend_token);
2773 	}
2774 
2775 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2776 
2777 	thread_mtx_unlock(thread);
2778 
2779 	thread_policy_update_complete_unlocked(thread, &pend_token);
2780 
2781 	if (override_new) {
2782 		zfree(thread_qos_override_zone, override_new);
2783 	}
2784 
2785 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2786 	    int, new_qos_override, int, new_effective_qos, int, rc);
2787 
2788 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2789 	    new_qos_override, resource, resource_type, 0, 0);
2790 
2791 	return rc;
2792 }
2793 
2794 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2795 proc_thread_qos_add_override(task_t           task,
2796     thread_t         thread,
2797     uint64_t         tid,
2798     int              override_qos,
2799     boolean_t        first_override_for_resource,
2800     user_addr_t      resource,
2801     int              resource_type)
2802 {
2803 	boolean_t has_thread_reference = FALSE;
2804 	int rc = 0;
2805 
2806 	if (thread == THREAD_NULL) {
2807 		thread = task_findtid(task, tid);
2808 		/* returns referenced thread */
2809 
2810 		if (thread == THREAD_NULL) {
2811 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2812 			    tid, 0, 0xdead, 0, 0);
2813 			return ESRCH;
2814 		}
2815 		has_thread_reference = TRUE;
2816 	} else {
2817 		assert(get_threadtask(thread) == task);
2818 	}
2819 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2820 	    first_override_for_resource, resource, resource_type);
2821 	if (has_thread_reference) {
2822 		thread_deallocate(thread);
2823 	}
2824 
2825 	return rc;
2826 }
2827 
2828 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2829 proc_thread_qos_remove_override_internal(thread_t       thread,
2830     user_addr_t    resource,
2831     int            resource_type,
2832     boolean_t      reset)
2833 {
2834 	struct task_pend_token pend_token = {};
2835 
2836 	struct thread_qos_override *deferred_free_override_list = NULL;
2837 	int new_qos_override, prev_qos_override, new_effective_qos;
2838 
2839 	thread_mtx_lock(thread);
2840 
2841 	canonicalize_resource_and_type(&resource, &resource_type);
2842 
2843 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2844 
2845 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2846 	    thread_tid(thread), resource, reset, 0, 0);
2847 
2848 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2849 	    uint64_t, thread->requested_policy.thrp_qos,
2850 	    uint64_t, thread->effective_policy.thep_qos);
2851 
2852 	/* Determine how to combine the various overrides into a single current requested override */
2853 	new_qos_override = calculate_requested_qos_override(thread);
2854 
2855 	spl_t s = splsched();
2856 	thread_lock(thread);
2857 
2858 	/*
2859 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2860 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2861 	 * This means you can't change the current override from a spinlock-only setter.
2862 	 */
2863 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2864 
2865 	if (new_qos_override != prev_qos_override) {
2866 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2867 	}
2868 
2869 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2870 
2871 	thread_unlock(thread);
2872 	splx(s);
2873 
2874 	thread_mtx_unlock(thread);
2875 
2876 	thread_policy_update_complete_unlocked(thread, &pend_token);
2877 
2878 	while (deferred_free_override_list) {
2879 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2880 
2881 		zfree(thread_qos_override_zone, deferred_free_override_list);
2882 		deferred_free_override_list = override_next;
2883 	}
2884 
2885 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2886 	    int, new_qos_override, int, new_effective_qos);
2887 
2888 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2889 	    thread_tid(thread), 0, 0, 0, 0);
2890 }
2891 
2892 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2893 proc_thread_qos_remove_override(task_t      task,
2894     thread_t    thread,
2895     uint64_t    tid,
2896     user_addr_t resource,
2897     int         resource_type)
2898 {
2899 	boolean_t has_thread_reference = FALSE;
2900 
2901 	if (thread == THREAD_NULL) {
2902 		thread = task_findtid(task, tid);
2903 		/* returns referenced thread */
2904 
2905 		if (thread == THREAD_NULL) {
2906 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2907 			    tid, 0, 0xdead, 0, 0);
2908 			return ESRCH;
2909 		}
2910 		has_thread_reference = TRUE;
2911 	} else {
2912 		assert(task == get_threadtask(thread));
2913 	}
2914 
2915 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2916 
2917 	if (has_thread_reference) {
2918 		thread_deallocate(thread);
2919 	}
2920 
2921 	return 0;
2922 }
2923 
2924 /* Deallocate before thread termination */
2925 void
proc_thread_qos_deallocate(thread_t thread)2926 proc_thread_qos_deallocate(thread_t thread)
2927 {
2928 	/* This thread must have no more IPC overrides. */
2929 	assert(thread->kevent_overrides == 0);
2930 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2931 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2932 
2933 	/*
2934 	 * Clear out any lingering override objects.
2935 	 */
2936 	struct thread_qos_override *override;
2937 
2938 	thread_mtx_lock(thread);
2939 	override = thread->overrides;
2940 	thread->overrides = NULL;
2941 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2942 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2943 	thread_mtx_unlock(thread);
2944 
2945 	while (override) {
2946 		struct thread_qos_override *override_next = override->override_next;
2947 
2948 		zfree(thread_qos_override_zone, override);
2949 		override = override_next;
2950 	}
2951 }
2952 
2953 /*
2954  * Set up the primordial thread's QoS
2955  */
2956 void
task_set_main_thread_qos(task_t task,thread_t thread)2957 task_set_main_thread_qos(task_t task, thread_t thread)
2958 {
2959 	struct task_pend_token pend_token = {};
2960 
2961 	assert(get_threadtask(thread) == task);
2962 
2963 	thread_mtx_lock(thread);
2964 
2965 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2966 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2967 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2968 	    thread->requested_policy.thrp_qos, 0);
2969 
2970 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2971 
2972 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2973 	    primordial_qos, 0, &pend_token);
2974 
2975 	thread_mtx_unlock(thread);
2976 
2977 	thread_policy_update_complete_unlocked(thread, &pend_token);
2978 
2979 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2980 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2981 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2982 	    primordial_qos, 0);
2983 }
2984 
2985 /*
2986  * KPI for pthread kext
2987  *
2988  * Return a good guess at what the initial manager QoS will be
2989  * Dispatch can override this in userspace if it so chooses
2990  */
2991 thread_qos_t
task_get_default_manager_qos(task_t task)2992 task_get_default_manager_qos(task_t task)
2993 {
2994 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2995 
2996 	if (primordial_qos == THREAD_QOS_LEGACY) {
2997 		primordial_qos = THREAD_QOS_USER_INITIATED;
2998 	}
2999 
3000 	return primordial_qos;
3001 }
3002 
3003 /*
3004  * Check if the kernel promotion on thread has changed
3005  * and apply it.
3006  *
3007  * thread locked on entry and exit
3008  */
3009 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3010 thread_recompute_kernel_promotion_locked(thread_t thread)
3011 {
3012 	boolean_t needs_update = FALSE;
3013 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3014 
3015 	/*
3016 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3017 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3018 	 * and propagates the priority through the chain with the same cap, because as of now it does
3019 	 * not differenciate on the kernel primitive.
3020 	 *
3021 	 * If this assumption will change with the adoption of a kernel primitive that does not
3022 	 * cap the when adding/propagating,
3023 	 * then here is the place to put the generic cap for all kernel primitives
3024 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3025 	 */
3026 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3027 
3028 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3029 		KDBG(MACHDBG_CODE(
3030 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3031 		    thread_tid(thread),
3032 		    kern_promotion_schedpri,
3033 		    thread->kern_promotion_schedpri);
3034 
3035 		needs_update = TRUE;
3036 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3037 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3038 	}
3039 
3040 	return needs_update;
3041 }
3042 
3043 /*
3044  * Check if the user promotion on thread has changed
3045  * and apply it.
3046  *
3047  * thread locked on entry, might drop the thread lock
3048  * and reacquire it.
3049  */
3050 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3051 thread_recompute_user_promotion_locked(thread_t thread)
3052 {
3053 	boolean_t needs_update = FALSE;
3054 	struct task_pend_token pend_token = {};
3055 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3056 	int old_base_pri = thread->base_pri;
3057 	thread_qos_t qos_promotion;
3058 
3059 	/* Check if user promotion has changed */
3060 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3061 		return needs_update;
3062 	} else {
3063 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3064 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3065 		    thread_tid(thread),
3066 		    user_promotion_basepri,
3067 		    thread->user_promotion_basepri,
3068 		    0, 0);
3069 		KDBG(MACHDBG_CODE(
3070 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3071 		    thread_tid(thread),
3072 		    user_promotion_basepri,
3073 		    thread->user_promotion_basepri);
3074 	}
3075 
3076 	/* Update the user promotion base pri */
3077 	thread->user_promotion_basepri = user_promotion_basepri;
3078 	pend_token.tpt_force_recompute_pri = 1;
3079 
3080 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3081 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3082 	} else {
3083 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3084 	}
3085 
3086 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3087 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3088 
3089 	if (thread_get_waiting_turnstile(thread) &&
3090 	    thread->base_pri != old_base_pri) {
3091 		needs_update = TRUE;
3092 	}
3093 
3094 	thread_unlock(thread);
3095 
3096 	thread_policy_update_complete_unlocked(thread, &pend_token);
3097 
3098 	thread_lock(thread);
3099 
3100 	return needs_update;
3101 }
3102 
3103 /*
3104  * Convert the thread user promotion base pri to qos for threads in qos world.
3105  * For priority above UI qos, the qos would be set to UI.
3106  */
3107 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3108 thread_user_promotion_qos_for_pri(int priority)
3109 {
3110 	thread_qos_t qos;
3111 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3112 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3113 			return qos;
3114 		}
3115 	}
3116 	return THREAD_QOS_MAINTENANCE;
3117 }
3118 
3119 /*
3120  * Set the thread's QoS Kevent override
3121  * Owned by the Kevent subsystem
3122  *
3123  * May be called with spinlocks held, but not spinlocks
3124  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3125  *
3126  * One 'add' must be balanced by one 'drop'.
3127  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3128  * Before the thread is deallocated, there must be 0 remaining overrides.
3129  */
3130 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3131 thread_kevent_override(thread_t    thread,
3132     uint32_t    qos_override,
3133     boolean_t   is_new_override)
3134 {
3135 	struct task_pend_token pend_token = {};
3136 	boolean_t needs_update;
3137 
3138 	spl_t s = splsched();
3139 	thread_lock(thread);
3140 
3141 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3142 
3143 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3144 	assert(qos_override < THREAD_QOS_LAST);
3145 
3146 	if (is_new_override) {
3147 		if (thread->kevent_overrides++ == 0) {
3148 			/* This add is the first override for this thread */
3149 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3150 		} else {
3151 			/* There are already other overrides in effect for this thread */
3152 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3153 		}
3154 	} else {
3155 		/* There must be at least one override (the previous add call) in effect */
3156 		assert(thread->kevent_overrides > 0);
3157 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3158 	}
3159 
3160 	/*
3161 	 * We can't allow lowering if there are several IPC overrides because
3162 	 * the caller can't possibly know the whole truth
3163 	 */
3164 	if (thread->kevent_overrides == 1) {
3165 		needs_update = qos_override != old_override;
3166 	} else {
3167 		needs_update = qos_override > old_override;
3168 	}
3169 
3170 	if (needs_update) {
3171 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3172 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3173 		    qos_override, 0, &pend_token);
3174 		assert(pend_token.tpt_update_sockets == 0);
3175 	}
3176 
3177 	thread_unlock(thread);
3178 	splx(s);
3179 
3180 	thread_policy_update_complete_unlocked(thread, &pend_token);
3181 }
3182 
3183 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3184 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3185 {
3186 	thread_kevent_override(thread, qos_override, TRUE);
3187 }
3188 
3189 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3190 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3191 {
3192 	thread_kevent_override(thread, qos_override, FALSE);
3193 }
3194 
3195 void
thread_drop_kevent_override(thread_t thread)3196 thread_drop_kevent_override(thread_t thread)
3197 {
3198 	struct task_pend_token pend_token = {};
3199 
3200 	spl_t s = splsched();
3201 	thread_lock(thread);
3202 
3203 	assert(thread->kevent_overrides > 0);
3204 
3205 	if (--thread->kevent_overrides == 0) {
3206 		/*
3207 		 * There are no more overrides for this thread, so we should
3208 		 * clear out the saturated override value
3209 		 */
3210 
3211 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3212 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3213 		    0, &pend_token);
3214 	}
3215 
3216 	thread_unlock(thread);
3217 	splx(s);
3218 
3219 	thread_policy_update_complete_unlocked(thread, &pend_token);
3220 }
3221 
3222 /*
3223  * Set the thread's QoS Workloop Servicer override
3224  * Owned by the Kevent subsystem
3225  *
3226  * May be called with spinlocks held, but not spinlocks
3227  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3228  *
3229  * One 'add' must be balanced by one 'drop'.
3230  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3231  * Before the thread is deallocated, there must be 0 remaining overrides.
3232  */
3233 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3234 thread_servicer_override(thread_t    thread,
3235     uint32_t    qos_override,
3236     boolean_t   is_new_override)
3237 {
3238 	struct task_pend_token pend_token = {};
3239 
3240 	spl_t s = splsched();
3241 	thread_lock(thread);
3242 
3243 	if (is_new_override) {
3244 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3245 	} else {
3246 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3247 	}
3248 
3249 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3250 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3251 	    qos_override, 0, &pend_token);
3252 
3253 	thread_unlock(thread);
3254 	splx(s);
3255 
3256 	assert(pend_token.tpt_update_sockets == 0);
3257 	thread_policy_update_complete_unlocked(thread, &pend_token);
3258 }
3259 
3260 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3261 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3262 {
3263 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3264 	assert(qos_override < THREAD_QOS_LAST);
3265 
3266 	thread_servicer_override(thread, qos_override, TRUE);
3267 }
3268 
3269 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3270 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3271 {
3272 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3273 	assert(qos_override < THREAD_QOS_LAST);
3274 
3275 	thread_servicer_override(thread, qos_override, FALSE);
3276 }
3277 
3278 void
thread_drop_servicer_override(thread_t thread)3279 thread_drop_servicer_override(thread_t thread)
3280 {
3281 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3282 }
3283 
3284 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3285 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3286 {
3287 	struct task_pend_token pend_token = {};
3288 	uint8_t current_iotier;
3289 
3290 	/* Check if the update is needed */
3291 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3292 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3293 
3294 	if (iotier_override == current_iotier) {
3295 		return;
3296 	}
3297 
3298 	spl_t s = splsched();
3299 	thread_lock(thread);
3300 
3301 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3302 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3303 	    iotier_override, 0, &pend_token);
3304 
3305 	thread_unlock(thread);
3306 	splx(s);
3307 
3308 	assert(pend_token.tpt_update_sockets == 0);
3309 	thread_policy_update_complete_unlocked(thread, &pend_token);
3310 }
3311 
3312 /* Get current requested qos / relpri, may be called from spinlock context */
3313 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3314 thread_get_requested_qos(thread_t thread, int *relpri)
3315 {
3316 	int relprio_value = 0;
3317 	thread_qos_t qos;
3318 
3319 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3320 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3321 	if (relpri) {
3322 		*relpri = -relprio_value;
3323 	}
3324 	return qos;
3325 }
3326 
3327 /*
3328  * This function will promote the thread priority
3329  * since exec could block other threads calling
3330  * proc_find on the proc. This boost must be removed
3331  * via call to thread_clear_exec_promotion.
3332  *
3333  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3334  */
3335 void
thread_set_exec_promotion(thread_t thread)3336 thread_set_exec_promotion(thread_t thread)
3337 {
3338 	spl_t s = splsched();
3339 	thread_lock(thread);
3340 
3341 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3342 
3343 	thread_unlock(thread);
3344 	splx(s);
3345 }
3346 
3347 /*
3348  * This function will clear the exec thread
3349  * promotion set on the thread by thread_set_exec_promotion.
3350  */
3351 void
thread_clear_exec_promotion(thread_t thread)3352 thread_clear_exec_promotion(thread_t thread)
3353 {
3354 	spl_t s = splsched();
3355 	thread_lock(thread);
3356 
3357 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3358 
3359 	thread_unlock(thread);
3360 	splx(s);
3361 }
3362 
3363 #if CONFIG_SCHED_RT_ALLOW
3364 
3365 /*
3366  * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3367  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3368  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3369  */
3370 static TUNABLE(
3371 	bool,
3372 	rt_allow_policy_enabled,
3373 	"-rt-allow_policy-enable",
3374 	false
3375 	);
3376 
3377 /*
3378  * When the RT allow policy is enabled and a thread allowed to become RT,
3379  * sometimes (if the processes RT allow policy is restricted) the thread will
3380  * have a CPU limit enforced. The following two tunables determine the
3381  * parameters for that CPU limit.
3382  */
3383 
3384 /* % of the interval allowed to run. */
3385 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3386     "rt_allow_limit_percent", 70);
3387 
3388 /* The length of interval in nanoseconds. */
3389 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3390     "rt_allow_limit_interval", 10);
3391 
3392 /*
3393  * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3394  * up by the target thread via the ledger AST.
3395  */
3396 static void
thread_rt_set_cpulimit(thread_t thread)3397 thread_rt_set_cpulimit(thread_t thread)
3398 {
3399 	/* Force reasonable values for the cpu limit. */
3400 	const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3401 	const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3402 
3403 	thread->t_ledger_req_percentage = percent;
3404 	thread->t_ledger_req_interval_ms = interval_ms;
3405 	thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3406 }
3407 
3408 /* Similar to the above but removes any CPU limit. */
3409 static void
thread_rt_clear_cpulimit(thread_t thread)3410 thread_rt_clear_cpulimit(thread_t thread)
3411 {
3412 	thread->t_ledger_req_percentage = 0;
3413 	thread->t_ledger_req_interval_ms = 0;
3414 	thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3415 }
3416 
3417 /*
3418  * Evaluate RT policy for a thread, demoting and undemoting as needed.
3419  */
3420 void
thread_rt_evaluate(thread_t thread)3421 thread_rt_evaluate(thread_t thread)
3422 {
3423 	task_t task = get_threadtask(thread);
3424 	bool platform_binary = false;
3425 
3426 	/* If the RT allow policy is not enabled - nothing to do. */
3427 	if (!rt_allow_policy_enabled) {
3428 		return;
3429 	}
3430 
3431 	/* User threads only. */
3432 	if (task == kernel_task) {
3433 		return;
3434 	}
3435 
3436 	/* Check for platform binary. */
3437 	platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3438 
3439 	spl_t s = splsched();
3440 	thread_lock(thread);
3441 
3442 	const thread_work_interval_flags_t wi_flags =
3443 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3444 
3445 	/*
3446 	 * RT threads which are not joined to a work interval which allows RT
3447 	 * threads are demoted. Once those conditions no longer hold, the thread
3448 	 * undemoted.
3449 	 */
3450 	if ((thread->sched_mode == TH_MODE_REALTIME || thread->saved_mode == TH_MODE_REALTIME) &&
3451 	    (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3452 		if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3453 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3454 			    thread_tid(thread));
3455 			sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3456 		}
3457 	} else {
3458 		if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3459 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3460 		}
3461 	}
3462 
3463 	/*
3464 	 * RT threads get a CPU limit unless they're part of a platform binary
3465 	 * task.
3466 	 */
3467 	if ((thread->sched_mode == TH_MODE_REALTIME || thread->saved_mode == TH_MODE_REALTIME) &&
3468 	    !platform_binary) {
3469 		thread_rt_set_cpulimit(thread);
3470 	} else {
3471 		thread_rt_clear_cpulimit(thread);
3472 	}
3473 
3474 	thread_unlock(thread);
3475 	splx(s);
3476 
3477 	/* Ensure the target thread picks up any CPU limit change. */
3478 	act_set_astledger(thread);
3479 }
3480 
3481 #else
3482 
3483 void
thread_rt_evaluate(__unused thread_t thread)3484 thread_rt_evaluate(__unused thread_t thread)
3485 {
3486 }
3487 
3488 #endif /*  CONFIG_SCHED_RT_ALLOW */
3489