xref: /xnu-12377.61.12/osfmk/kern/thread_policy.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <libkern/OSAtomic.h>
30 #include <mach/mach_types.h>
31 #include <mach/thread_act_server.h>
32 
33 #include <kern/kern_types.h>
34 #include <kern/processor.h>
35 #include <kern/thread.h>
36 #include <kern/affinity.h>
37 #include <kern/work_interval.h>
38 #include <mach/task_policy.h>
39 #include <kern/sfi.h>
40 #include <kern/policy_internal.h>
41 #include <sys/errno.h>
42 #include <sys/ulock.h>
43 
44 #include <mach/machine/sdt.h>
45 
46 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
47     struct thread_qos_override, KT_DEFAULT);
48 
49 #ifdef MACH_BSD
50 extern int      proc_selfpid(void);
51 extern char *   proc_name_address(void *p);
52 extern void     rethrottle_thread(void * uthread);
53 #endif /* MACH_BSD */
54 
55 #define QOS_EXTRACT(q)        ((q) & 0xff)
56 
57 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
58 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
60 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
61 
62 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
63     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
64 
65 static void
66 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
67 
68 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
69 
70 const struct thread_requested_policy default_thread_requested_policy = {
71 	.thrp_iotier_kevent_override = thread_default_iotier_override
72 };
73 
74 /*
75  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
76  * to threads that don't have a QoS class set.
77  */
78 const qos_policy_params_t thread_qos_policy_params = {
79 	/*
80 	 * This table defines the starting base priority of the thread,
81 	 * which will be modified by the thread importance and the task max priority
82 	 * before being applied.
83 	 */
84 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
85 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
86 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
87 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
88 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
89 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
90 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
91 
92 	/*
93 	 * This table defines the highest IO priority that a thread marked with this
94 	 * QoS class can have.
95 	 */
96 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
100 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
101 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
102 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
103 
104 	/*
105 	 * This table defines the highest QoS level that
106 	 * a thread marked with this QoS class can have.
107 	 */
108 
109 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
110 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
111 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
113 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
114 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
116 
117 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
118 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
119 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
121 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
124 };
125 
126 static void
127 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
128 
129 static int
130 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
131 
132 static void
133 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
134 
135 static void
136 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
137 
138 static void
139 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
140 
141 static void
142 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
143 
144 static int
145 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
146 
147 static int
148 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
149 
150 static void
151 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
152 
153 static void
154 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
155 
156 boolean_t
thread_has_qos_policy(thread_t thread)157 thread_has_qos_policy(thread_t thread)
158 {
159 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
160 }
161 
162 
163 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)164 thread_remove_qos_policy_locked(thread_t thread,
165     task_pend_token_t pend_token)
166 {
167 	__unused int prev_qos = thread->requested_policy.thrp_qos;
168 
169 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
170 
171 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
172 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
173 }
174 
175 kern_return_t
thread_remove_qos_policy(thread_t thread)176 thread_remove_qos_policy(thread_t thread)
177 {
178 	struct task_pend_token pend_token = {};
179 
180 	thread_mtx_lock(thread);
181 	if (!thread->active) {
182 		thread_mtx_unlock(thread);
183 		return KERN_TERMINATED;
184 	}
185 
186 	thread_remove_qos_policy_locked(thread, &pend_token);
187 
188 	thread_mtx_unlock(thread);
189 
190 	thread_policy_update_complete_unlocked(thread, &pend_token);
191 
192 	return KERN_SUCCESS;
193 }
194 
195 
196 boolean_t
thread_is_static_param(thread_t thread)197 thread_is_static_param(thread_t thread)
198 {
199 	if (thread->static_param) {
200 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
201 		return TRUE;
202 	}
203 	return FALSE;
204 }
205 
206 /*
207  * Relative priorities can range between 0REL and -15REL. These
208  * map to QoS-specific ranges, to create non-overlapping priority
209  * ranges.
210  */
211 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)212 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
213 {
214 	int next_lower_qos;
215 
216 	/* Fast path, since no validation or scaling is needed */
217 	if (qos_relprio == 0) {
218 		return 0;
219 	}
220 
221 	switch (qos) {
222 	case THREAD_QOS_USER_INTERACTIVE:
223 		next_lower_qos = THREAD_QOS_USER_INITIATED;
224 		break;
225 	case THREAD_QOS_USER_INITIATED:
226 		next_lower_qos = THREAD_QOS_LEGACY;
227 		break;
228 	case THREAD_QOS_LEGACY:
229 		next_lower_qos = THREAD_QOS_UTILITY;
230 		break;
231 	case THREAD_QOS_UTILITY:
232 		next_lower_qos = THREAD_QOS_BACKGROUND;
233 		break;
234 	case THREAD_QOS_MAINTENANCE:
235 	case THREAD_QOS_BACKGROUND:
236 		next_lower_qos = 0;
237 		break;
238 	default:
239 		panic("Unrecognized QoS %d", qos);
240 		return 0;
241 	}
242 
243 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
244 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
245 
246 	/*
247 	 * We now have the valid range that the scaled relative priority can map to. Note
248 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
249 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
250 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
251 	 * remainder.
252 	 */
253 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
254 
255 	return scaled_relprio;
256 }
257 
258 /*
259  * flag set by -qos-policy-allow boot-arg to allow
260  * testing thread qos policy from userspace
261  */
262 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
263 
264 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)265 thread_policy_set(
266 	thread_t                                thread,
267 	thread_policy_flavor_t  flavor,
268 	thread_policy_t                 policy_info,
269 	mach_msg_type_number_t  count)
270 {
271 	thread_qos_policy_data_t req_qos;
272 	kern_return_t kr;
273 
274 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
275 
276 	if (thread == THREAD_NULL) {
277 		return KERN_INVALID_ARGUMENT;
278 	}
279 
280 	if (!allow_qos_policy_set) {
281 		if (thread_is_static_param(thread)) {
282 			return KERN_POLICY_STATIC;
283 		}
284 
285 		if (flavor == THREAD_QOS_POLICY) {
286 			return KERN_INVALID_ARGUMENT;
287 		}
288 
289 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
290 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
291 				return KERN_INVALID_ARGUMENT;
292 			}
293 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
294 			if (info->priority != BASEPRI_RTQUEUES) {
295 				return KERN_INVALID_ARGUMENT;
296 			}
297 		}
298 	}
299 
300 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
301 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
302 			&thread->th_work_interval_flags, relaxed);
303 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
304 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
305 			/* Fail requests to become realtime for threads having joined workintervals
306 			 * with workload ID that don't have the rt-allowed flag. */
307 			return KERN_INVALID_POLICY;
308 		}
309 	}
310 
311 	/* Threads without static_param set reset their QoS when other policies are applied. */
312 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
313 		/* Store the existing tier, if we fail this call it is used to reset back. */
314 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
315 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
316 
317 		kr = thread_remove_qos_policy(thread);
318 		if (kr != KERN_SUCCESS) {
319 			return kr;
320 		}
321 	}
322 
323 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
324 
325 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
326 		if (kr != KERN_SUCCESS) {
327 			/* Reset back to our original tier as the set failed. */
328 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
329 		}
330 	}
331 
332 	return kr;
333 }
334 
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
338 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
339 
340 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)341 thread_policy_set_internal(
342 	thread_t                     thread,
343 	thread_policy_flavor_t       flavor,
344 	thread_policy_t              policy_info,
345 	mach_msg_type_number_t       count)
346 {
347 	kern_return_t result = KERN_SUCCESS;
348 	struct task_pend_token pend_token = {};
349 
350 	thread_mtx_lock(thread);
351 	if (!thread->active) {
352 		thread_mtx_unlock(thread);
353 
354 		return KERN_TERMINATED;
355 	}
356 
357 	switch (flavor) {
358 	case THREAD_EXTENDED_POLICY:
359 	{
360 		boolean_t timeshare = TRUE;
361 
362 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
363 			thread_extended_policy_t info;
364 
365 			info = (thread_extended_policy_t)policy_info;
366 			timeshare = info->timeshare;
367 		}
368 
369 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
370 
371 		spl_t s = splsched();
372 		thread_lock(thread);
373 
374 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
375 
376 		thread_unlock(thread);
377 		splx(s);
378 
379 		/*
380 		 * The thread may be demoted with RT_DISALLOWED but has just
381 		 * changed its sched mode to TIMESHARE or FIXED. Make sure to
382 		 * undemote the thread so the new sched mode takes effect.
383 		 */
384 		thread_rt_evaluate(thread);
385 
386 		pend_token.tpt_update_thread_sfi = 1;
387 
388 		break;
389 	}
390 
391 	case THREAD_TIME_CONSTRAINT_POLICY:
392 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
393 	{
394 		thread_time_constraint_with_priority_policy_t info;
395 
396 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
397 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
398 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
399 
400 		if (count < min_count) {
401 			result = KERN_INVALID_ARGUMENT;
402 			break;
403 		}
404 
405 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
406 
407 
408 		if (info->constraint < info->computation ||
409 		    info->computation > max_rt_quantum ||
410 		    info->computation < min_rt_quantum) {
411 			result = KERN_INVALID_ARGUMENT;
412 			break;
413 		}
414 
415 		if (info->computation < (info->constraint / 2)) {
416 			info->computation = (info->constraint / 2);
417 			if (info->computation > max_rt_quantum) {
418 				info->computation = max_rt_quantum;
419 			}
420 		}
421 
422 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
423 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
424 				result = KERN_INVALID_ARGUMENT;
425 				break;
426 			}
427 		}
428 
429 		spl_t s = splsched();
430 		thread_lock(thread);
431 
432 		thread->realtime.period          = info->period;
433 		thread->realtime.computation     = info->computation;
434 		thread->realtime.constraint      = info->constraint;
435 		thread->realtime.preemptible     = info->preemptible;
436 
437 		/*
438 		 * If the thread has a work interval driven policy, the priority
439 		 * offset has been set by the work interval.
440 		 */
441 		if (!thread->requested_policy.thrp_wi_driven) {
442 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
443 				thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
444 			} else {
445 				thread->realtime.priority_offset = 0;
446 			}
447 		}
448 
449 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
450 
451 		thread_unlock(thread);
452 		splx(s);
453 
454 		thread_rt_evaluate(thread);
455 
456 		pend_token.tpt_update_thread_sfi = 1;
457 
458 		break;
459 	}
460 
461 	case THREAD_PRECEDENCE_POLICY:
462 	{
463 		thread_precedence_policy_t info;
464 
465 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
466 			result = KERN_INVALID_ARGUMENT;
467 			break;
468 		}
469 		info = (thread_precedence_policy_t)policy_info;
470 
471 		spl_t s = splsched();
472 		thread_lock(thread);
473 
474 		thread->importance = info->importance;
475 
476 		thread_recompute_priority(thread);
477 
478 		thread_unlock(thread);
479 		splx(s);
480 
481 		break;
482 	}
483 
484 	case THREAD_AFFINITY_POLICY:
485 	{
486 		extern boolean_t affinity_sets_enabled;
487 		thread_affinity_policy_t info;
488 
489 		if (!affinity_sets_enabled) {
490 			result = KERN_INVALID_POLICY;
491 			break;
492 		}
493 
494 		if (!thread_affinity_is_supported()) {
495 			result = KERN_NOT_SUPPORTED;
496 			break;
497 		}
498 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
499 			result = KERN_INVALID_ARGUMENT;
500 			break;
501 		}
502 
503 		info = (thread_affinity_policy_t) policy_info;
504 		/*
505 		 * Unlock the thread mutex here and
506 		 * return directly after calling thread_affinity_set().
507 		 * This is necessary for correct lock ordering because
508 		 * thread_affinity_set() takes the task lock.
509 		 */
510 		thread_mtx_unlock(thread);
511 		return thread_affinity_set(thread, info->affinity_tag);
512 	}
513 
514 #if !defined(XNU_TARGET_OS_OSX)
515 	case THREAD_BACKGROUND_POLICY:
516 	{
517 		thread_background_policy_t info;
518 
519 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
520 			result = KERN_INVALID_ARGUMENT;
521 			break;
522 		}
523 
524 		if (get_threadtask(thread) != current_task()) {
525 			result = KERN_PROTECTION_FAILURE;
526 			break;
527 		}
528 
529 		info = (thread_background_policy_t) policy_info;
530 
531 		int enable;
532 
533 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
534 			enable = TASK_POLICY_ENABLE;
535 		} else {
536 			enable = TASK_POLICY_DISABLE;
537 		}
538 
539 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
540 
541 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
542 
543 		break;
544 	}
545 #endif /* !defined(XNU_TARGET_OS_OSX) */
546 
547 	case THREAD_THROUGHPUT_QOS_POLICY:
548 	{
549 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
550 		thread_throughput_qos_t tqos;
551 
552 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
553 			result = KERN_INVALID_ARGUMENT;
554 			break;
555 		}
556 
557 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
558 			break;
559 		}
560 
561 		tqos = qos_extract(info->thread_throughput_qos_tier);
562 
563 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
564 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
565 
566 		break;
567 	}
568 
569 	case THREAD_LATENCY_QOS_POLICY:
570 	{
571 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
572 		thread_latency_qos_t lqos;
573 
574 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
575 			result = KERN_INVALID_ARGUMENT;
576 			break;
577 		}
578 
579 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
580 			break;
581 		}
582 
583 		lqos = qos_extract(info->thread_latency_qos_tier);
584 
585 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
586 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
587 
588 		break;
589 	}
590 
591 	case THREAD_QOS_POLICY:
592 	{
593 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
594 
595 		if (count < THREAD_QOS_POLICY_COUNT) {
596 			result = KERN_INVALID_ARGUMENT;
597 			break;
598 		}
599 
600 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
601 			result = KERN_INVALID_ARGUMENT;
602 			break;
603 		}
604 
605 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
606 			result = KERN_INVALID_ARGUMENT;
607 			break;
608 		}
609 
610 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
611 			result = KERN_INVALID_ARGUMENT;
612 			break;
613 		}
614 
615 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
616 		    info->qos_tier, -info->tier_importance, &pend_token);
617 
618 		break;
619 	}
620 
621 	default:
622 		result = KERN_INVALID_ARGUMENT;
623 		break;
624 	}
625 
626 	thread_mtx_unlock(thread);
627 
628 	thread_policy_update_complete_unlocked(thread, &pend_token);
629 
630 	return result;
631 }
632 
633 /*
634  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
635  * Both result in FIXED mode scheduling.
636  */
637 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)638 convert_policy_to_sched_mode(integer_t policy)
639 {
640 	switch (policy) {
641 	case POLICY_TIMESHARE:
642 		return TH_MODE_TIMESHARE;
643 	case POLICY_RR:
644 	case POLICY_FIFO:
645 		return TH_MODE_FIXED;
646 	default:
647 		panic("unexpected sched policy: %d", policy);
648 		return TH_MODE_NONE;
649 	}
650 }
651 
652 /*
653  * Called either with the thread mutex locked
654  * or from the pthread kext in a 'safe place'.
655  */
656 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)657 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
658     sched_mode_t          mode,
659     integer_t             priority,
660     task_pend_token_t     pend_token)
661 {
662 	kern_return_t kr = KERN_SUCCESS;
663 
664 	spl_t s = splsched();
665 	thread_lock(thread);
666 
667 	/* This path isn't allowed to change a thread out of realtime. */
668 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
669 	    (thread->saved_mode == TH_MODE_REALTIME)) {
670 		kr = KERN_FAILURE;
671 		goto unlock;
672 	}
673 
674 	if (thread->policy_reset) {
675 		kr = KERN_SUCCESS;
676 		goto unlock;
677 	}
678 
679 	sched_mode_t old_mode = thread->sched_mode;
680 	integer_t old_base_pri = thread->base_pri;
681 	integer_t old_sched_pri = thread->sched_pri;
682 
683 	/*
684 	 * Reverse engineer and apply the correct importance value
685 	 * from the requested absolute priority value.
686 	 *
687 	 * TODO: Store the absolute priority value instead
688 	 */
689 
690 	if (priority >= thread->max_priority) {
691 		priority = thread->max_priority - thread->task_priority;
692 	} else if (priority >= MINPRI_KERNEL) {
693 		priority -=  MINPRI_KERNEL;
694 	} else if (priority >= MINPRI_RESERVED) {
695 		priority -=  MINPRI_RESERVED;
696 	} else {
697 		priority -= BASEPRI_DEFAULT;
698 	}
699 
700 	priority += thread->task_priority;
701 
702 	if (priority > thread->max_priority) {
703 		priority = thread->max_priority;
704 	} else if (priority < MINPRI) {
705 		priority = MINPRI;
706 	}
707 
708 	thread->importance = priority - thread->task_priority;
709 
710 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
711 
712 	if (mode != old_mode) {
713 		pend_token->tpt_update_thread_sfi = 1;
714 	}
715 
716 	if (thread->base_pri != old_base_pri ||
717 	    thread->sched_pri != old_sched_pri) {
718 		pend_token->tpt_update_turnstile = 1;
719 	}
720 
721 unlock:
722 	thread_unlock(thread);
723 	splx(s);
724 
725 	return kr;
726 }
727 
728 void
thread_freeze_base_pri(thread_t thread)729 thread_freeze_base_pri(thread_t thread)
730 {
731 	assert(thread == current_thread());
732 
733 	spl_t s = splsched();
734 	thread_lock(thread);
735 
736 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
737 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
738 
739 	thread_unlock(thread);
740 	splx(s);
741 }
742 
743 bool
thread_unfreeze_base_pri(thread_t thread)744 thread_unfreeze_base_pri(thread_t thread)
745 {
746 	assert(thread == current_thread());
747 	integer_t base_pri;
748 	ast_t ast = 0;
749 
750 	spl_t s = splsched();
751 	thread_lock(thread);
752 
753 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
754 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
755 
756 	base_pri = thread->req_base_pri;
757 	if (base_pri != thread->base_pri) {
758 		/*
759 		 * This function returns "true" if the base pri change
760 		 * is the most likely cause for the preemption.
761 		 */
762 		sched_set_thread_base_priority(thread, base_pri);
763 		ast = ast_peek(AST_PREEMPT);
764 	}
765 
766 	thread_unlock(thread);
767 	splx(s);
768 
769 	return ast != 0;
770 }
771 
772 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)773 thread_workq_pri_for_qos(thread_qos_t qos)
774 {
775 	assert(qos < THREAD_QOS_LAST);
776 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
777 }
778 
779 thread_qos_t
thread_workq_qos_for_pri(int priority)780 thread_workq_qos_for_pri(int priority)
781 {
782 	thread_qos_t qos;
783 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
784 		// indicate that workq should map >UI threads to workq's
785 		// internal notation for above-UI work.
786 		return THREAD_QOS_UNSPECIFIED;
787 	}
788 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
789 		// map a given priority up to the next nearest qos band.
790 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
791 			return qos;
792 		}
793 	}
794 	return THREAD_QOS_MAINTENANCE;
795 }
796 
797 /*
798  * private interface for pthread workqueues
799  *
800  * Set scheduling policy & absolute priority for thread
801  * May be called with spinlocks held
802  * Thread mutex lock is not held
803  */
804 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)805 thread_reset_workq_qos(thread_t thread, uint32_t qos)
806 {
807 	struct task_pend_token pend_token = {};
808 
809 	assert(qos < THREAD_QOS_LAST);
810 
811 	spl_t s = splsched();
812 	thread_lock(thread);
813 
814 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
815 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
816 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
817 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
818 	    &pend_token);
819 
820 	assert(pend_token.tpt_update_sockets == 0);
821 
822 	thread_unlock(thread);
823 	splx(s);
824 
825 	thread_policy_update_complete_unlocked(thread, &pend_token);
826 }
827 
828 /*
829  * private interface for pthread workqueues
830  *
831  * Set scheduling policy & absolute priority for thread
832  * May be called with spinlocks held
833  * Thread mutex lock is held
834  */
835 void
thread_set_workq_override(thread_t thread,uint32_t qos)836 thread_set_workq_override(thread_t thread, uint32_t qos)
837 {
838 	struct task_pend_token pend_token = {};
839 
840 	assert(qos < THREAD_QOS_LAST);
841 
842 	spl_t s = splsched();
843 	thread_lock(thread);
844 
845 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
846 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
847 
848 	assert(pend_token.tpt_update_sockets == 0);
849 
850 	thread_unlock(thread);
851 	splx(s);
852 
853 	thread_policy_update_complete_unlocked(thread, &pend_token);
854 }
855 
856 /*
857  * private interface for pthread workqueues
858  *
859  * Set scheduling policy & absolute priority for thread
860  * May be called with spinlocks held
861  * Thread mutex lock is not held
862  */
863 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)864 thread_set_workq_pri(thread_t  thread,
865     thread_qos_t qos,
866     integer_t priority,
867     integer_t policy)
868 {
869 	struct task_pend_token pend_token = {};
870 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
871 
872 	assert(qos < THREAD_QOS_LAST);
873 	assert(thread->static_param);
874 
875 	if (!thread->static_param || !thread->active) {
876 		return;
877 	}
878 
879 	spl_t s = splsched();
880 	thread_lock(thread);
881 
882 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
883 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
884 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
885 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
886 	    0, &pend_token);
887 
888 	thread_unlock(thread);
889 	splx(s);
890 
891 	/* Concern: this doesn't hold the mutex... */
892 
893 	__assert_only kern_return_t kr;
894 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
895 	    &pend_token);
896 	assert(kr == KERN_SUCCESS);
897 
898 	assert(pend_token.tpt_update_sockets == 0);
899 
900 	thread_policy_update_complete_unlocked(thread, &pend_token);
901 }
902 
903 /*
904  * thread_set_mode_and_absolute_pri:
905  *
906  * Set scheduling policy & absolute priority for thread, for deprecated
907  * thread_set_policy and thread_policy interfaces.
908  *
909  * Called with nothing locked.
910  */
911 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)912 thread_set_mode_and_absolute_pri(thread_t   thread,
913     integer_t  policy,
914     integer_t  priority)
915 {
916 	kern_return_t kr = KERN_SUCCESS;
917 	struct task_pend_token pend_token = {};
918 
919 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
920 
921 	thread_mtx_lock(thread);
922 
923 	if (!thread->active) {
924 		kr = KERN_TERMINATED;
925 		goto unlock;
926 	}
927 
928 	if (thread_is_static_param(thread)) {
929 		kr = KERN_POLICY_STATIC;
930 		goto unlock;
931 	}
932 
933 	/* Setting legacy policies on threads kills the current QoS */
934 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
935 		thread_remove_qos_policy_locked(thread, &pend_token);
936 	}
937 
938 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
939 
940 unlock:
941 	thread_mtx_unlock(thread);
942 
943 	thread_policy_update_complete_unlocked(thread, &pend_token);
944 
945 	return kr;
946 }
947 
948 /*
949  * Set the thread's requested mode and recompute priority
950  * Called with thread mutex and thread locked
951  *
952  * TODO: Mitigate potential problems caused by moving thread to end of runq
953  * whenever its priority is recomputed
954  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
955  */
956 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)957 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
958 {
959 	if (thread->policy_reset) {
960 		return;
961 	}
962 
963 	boolean_t removed = thread_run_queue_remove(thread);
964 
965 	sched_set_thread_mode_user(thread, mode);
966 
967 	thread_recompute_priority(thread);
968 
969 	if (removed) {
970 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
971 	}
972 }
973 
974 /* called at splsched with thread lock locked */
975 static void
thread_update_qos_cpu_time_locked(thread_t thread)976 thread_update_qos_cpu_time_locked(thread_t thread)
977 {
978 	task_t task = get_threadtask(thread);
979 	uint64_t timer_sum, timer_delta;
980 
981 	/*
982 	 * This is only as accurate the thread's last context switch or user/kernel
983 	 * transition (unless precise user/kernel time is disabled).
984 	 *
985 	 * TODO: Consider running an update operation here to update it first.
986 	 *       Maybe doable with interrupts disabled from current thread.
987 	 *       If the thread is on a different core, may not be easy to get right.
988 	 */
989 
990 	timer_sum = recount_thread_time_mach(thread);
991 	timer_delta = timer_sum - thread->vtimer_qos_save;
992 
993 	thread->vtimer_qos_save = timer_sum;
994 
995 	uint64_t* task_counter = NULL;
996 
997 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
998 	switch (thread->effective_policy.thep_qos) {
999 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
1000 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
1001 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1002 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1003 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1004 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1005 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1006 	default:
1007 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1008 	}
1009 
1010 	OSAddAtomic64(timer_delta, task_counter);
1011 
1012 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1013 	switch (thread->requested_policy.thrp_qos) {
1014 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1015 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1016 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1017 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1018 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1019 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1020 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1021 	default:
1022 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1023 	}
1024 
1025 	OSAddAtomic64(timer_delta, task_counter);
1026 }
1027 
1028 /*
1029  * called with no thread locks held
1030  * may hold task lock
1031  */
1032 void
thread_update_qos_cpu_time(thread_t thread)1033 thread_update_qos_cpu_time(thread_t thread)
1034 {
1035 	thread_mtx_lock(thread);
1036 
1037 	spl_t s = splsched();
1038 	thread_lock(thread);
1039 
1040 	thread_update_qos_cpu_time_locked(thread);
1041 
1042 	thread_unlock(thread);
1043 	splx(s);
1044 
1045 	thread_mtx_unlock(thread);
1046 }
1047 
1048 /*
1049  * Calculate base priority from thread attributes, and set it on the thread
1050  *
1051  * Called with thread_lock and thread mutex held.
1052  */
1053 void
thread_recompute_priority(thread_t thread)1054 thread_recompute_priority(
1055 	thread_t                thread)
1056 {
1057 	integer_t               priority;
1058 	integer_t               adj_priority;
1059 	bool                    wi_priority = false;
1060 
1061 	if (thread->policy_reset) {
1062 		return;
1063 	}
1064 
1065 	if (thread->sched_mode == TH_MODE_REALTIME) {
1066 		uint8_t i = thread->realtime.priority_offset;
1067 		assert((i >= 0) && (i < NRTQS));
1068 		priority = BASEPRI_RTQUEUES + i;
1069 
1070 		sched_set_thread_base_priority(thread, priority);
1071 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1072 			/* Make sure the thread has a valid deadline */
1073 			uint64_t ctime = mach_absolute_time();
1074 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1075 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1076 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1077 		}
1078 		return;
1079 
1080 		/*
1081 		 * A thread may have joined a RT work interval but then never
1082 		 * changed its sched mode or have been demoted. RT work
1083 		 * intervals will have RT priorities - ignore the priority if
1084 		 * the thread isn't RT.
1085 		 */
1086 	} else if (thread->effective_policy.thep_wi_driven &&
1087 	    work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1088 		priority = work_interval_get_priority(thread);
1089 		wi_priority = true;
1090 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1091 		int qos = thread->effective_policy.thep_qos;
1092 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1093 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1094 		int qos_scaled_relprio;
1095 
1096 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1097 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1098 
1099 		priority = thread_qos_policy_params.qos_pri[qos];
1100 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1101 
1102 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1103 			/* Bump priority 46 to 47 when in a frontmost app */
1104 			qos_scaled_relprio += 1;
1105 		}
1106 
1107 		/* TODO: factor in renice priority here? */
1108 
1109 		priority += qos_scaled_relprio;
1110 	} else {
1111 		if (thread->importance > MAXPRI) {
1112 			priority = MAXPRI;
1113 		} else if (thread->importance < -MAXPRI) {
1114 			priority = -MAXPRI;
1115 		} else {
1116 			priority = thread->importance;
1117 		}
1118 
1119 		priority += thread->task_priority;
1120 	}
1121 
1122 	/* Boost the priority of threads which are RT demoted. */
1123 	if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1124 		priority = MAX(priority, MAXPRI_USER);
1125 	}
1126 
1127 	priority = MAX(priority, thread->user_promotion_basepri);
1128 
1129 	/*
1130 	 * Clamp priority back into the allowed range for this task.
1131 	 *  The initial priority value could be out of this range due to:
1132 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1133 	 *      Task is user task (max-pri is 63)
1134 	 *      Task is kernel task (max-pri is 95)
1135 	 * Note that thread->importance is user-settable to any integer
1136 	 * via THREAD_PRECEDENCE_POLICY.
1137 	 */
1138 	adj_priority = priority;
1139 	adj_priority = MIN(adj_priority, thread->max_priority);
1140 	adj_priority = MAX(adj_priority, MINPRI);
1141 
1142 	/* Allow workload driven priorities to exceed max_priority. */
1143 	if (wi_priority) {
1144 		adj_priority = MAX(adj_priority, priority);
1145 	}
1146 
1147 	/* Allow priority to exceed max_priority for promotions. */
1148 	if (thread->effective_policy.thep_promote_above_task) {
1149 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1150 	}
1151 	priority = adj_priority;
1152 	assert3u(priority, <=, MAXPRI);
1153 
1154 	if (thread->saved_mode == TH_MODE_REALTIME &&
1155 	    sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1156 		priority = DEPRESSPRI;
1157 	}
1158 
1159 	if (thread->effective_policy.thep_terminated == TRUE) {
1160 		/*
1161 		 * We temporarily want to override the expected priority to
1162 		 * ensure that the thread exits in a timely manner.
1163 		 * Note that this is allowed to exceed thread->max_priority
1164 		 * so that the thread is no longer clamped to background
1165 		 * during the final exit phase.
1166 		 */
1167 		if (priority < thread->task_priority) {
1168 			priority = thread->task_priority;
1169 		}
1170 		if (priority < BASEPRI_DEFAULT) {
1171 			priority = BASEPRI_DEFAULT;
1172 		}
1173 	}
1174 
1175 #if !defined(XNU_TARGET_OS_OSX)
1176 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1177 	if (priority < MAXPRI_THROTTLE) {
1178 		priority = MAXPRI_THROTTLE;
1179 	}
1180 #endif /* !defined(XNU_TARGET_OS_OSX) */
1181 
1182 	sched_set_thread_base_priority(thread, priority);
1183 }
1184 
1185 /* Called with the task lock held, but not the thread mutex or spinlock */
1186 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1187 thread_policy_update_tasklocked(
1188 	thread_t           thread,
1189 	integer_t          priority,
1190 	integer_t          max_priority,
1191 	task_pend_token_t  pend_token)
1192 {
1193 	thread_mtx_lock(thread);
1194 
1195 	if (!thread->active || thread->policy_reset) {
1196 		thread_mtx_unlock(thread);
1197 		return;
1198 	}
1199 
1200 	spl_t s = splsched();
1201 	thread_lock(thread);
1202 
1203 	__unused
1204 	integer_t old_max_priority = thread->max_priority;
1205 
1206 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1207 	thread->task_priority = (int16_t)priority;
1208 
1209 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1210 	thread->max_priority = (int16_t)max_priority;
1211 
1212 	/*
1213 	 * When backgrounding a thread, realtime and fixed priority threads
1214 	 * should be demoted to timeshare background threads.
1215 	 *
1216 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1217 	 * remove/reinsert for a runnable thread
1218 	 */
1219 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1220 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1221 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1222 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1223 	}
1224 
1225 	thread_policy_update_spinlocked(thread, true, pend_token);
1226 
1227 	thread_unlock(thread);
1228 	splx(s);
1229 
1230 	thread_mtx_unlock(thread);
1231 }
1232 
1233 /*
1234  * Reset thread to default state in preparation for termination
1235  * Called with thread mutex locked
1236  *
1237  * Always called on current thread, so we don't need a run queue remove
1238  */
1239 void
thread_policy_reset(thread_t thread)1240 thread_policy_reset(
1241 	thread_t                thread)
1242 {
1243 	spl_t           s;
1244 
1245 	assert(thread == current_thread());
1246 
1247 	s = splsched();
1248 	thread_lock(thread);
1249 
1250 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1251 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1252 	}
1253 
1254 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1255 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1256 	}
1257 
1258 	if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1259 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1260 	}
1261 
1262 	/* At this point, the various demotions should be inactive */
1263 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1264 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1265 
1266 	/* Reset thread back to task-default basepri and mode  */
1267 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1268 
1269 	sched_set_thread_mode(thread, newmode);
1270 
1271 	thread->importance = 0;
1272 
1273 	/* Prevent further changes to thread base priority or mode */
1274 	thread->policy_reset = 1;
1275 
1276 	sched_set_thread_base_priority(thread, thread->task_priority);
1277 
1278 	thread_unlock(thread);
1279 	splx(s);
1280 }
1281 
1282 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1283 thread_policy_get(
1284 	thread_t                                thread,
1285 	thread_policy_flavor_t  flavor,
1286 	thread_policy_t                 policy_info,
1287 	mach_msg_type_number_t  *count,
1288 	boolean_t                               *get_default)
1289 {
1290 	kern_return_t                   result = KERN_SUCCESS;
1291 
1292 	if (thread == THREAD_NULL) {
1293 		return KERN_INVALID_ARGUMENT;
1294 	}
1295 
1296 	thread_mtx_lock(thread);
1297 	if (!thread->active) {
1298 		thread_mtx_unlock(thread);
1299 
1300 		return KERN_TERMINATED;
1301 	}
1302 
1303 	switch (flavor) {
1304 	case THREAD_EXTENDED_POLICY:
1305 	{
1306 		boolean_t               timeshare = TRUE;
1307 
1308 		if (!(*get_default)) {
1309 			spl_t s = splsched();
1310 			thread_lock(thread);
1311 
1312 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1313 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1314 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1315 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1316 				} else {
1317 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1318 				}
1319 			} else {
1320 				*get_default = TRUE;
1321 			}
1322 
1323 			thread_unlock(thread);
1324 			splx(s);
1325 		}
1326 
1327 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1328 			thread_extended_policy_t        info;
1329 
1330 			info = (thread_extended_policy_t)policy_info;
1331 			info->timeshare = timeshare;
1332 		}
1333 
1334 		break;
1335 	}
1336 
1337 	case THREAD_TIME_CONSTRAINT_POLICY:
1338 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1339 	{
1340 		thread_time_constraint_with_priority_policy_t         info;
1341 
1342 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1343 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1344 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1345 
1346 		if (*count < min_count) {
1347 			result = KERN_INVALID_ARGUMENT;
1348 			break;
1349 		}
1350 
1351 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1352 
1353 		if (!(*get_default)) {
1354 			spl_t s = splsched();
1355 			thread_lock(thread);
1356 
1357 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1358 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1359 				info->period = thread->realtime.period;
1360 				info->computation = thread->realtime.computation;
1361 				info->constraint = thread->realtime.constraint;
1362 				info->preemptible = thread->realtime.preemptible;
1363 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1364 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1365 				}
1366 			} else {
1367 				*get_default = TRUE;
1368 			}
1369 
1370 			thread_unlock(thread);
1371 			splx(s);
1372 		}
1373 
1374 		if (*get_default) {
1375 			info->period = 0;
1376 			info->computation = default_timeshare_computation;
1377 			info->constraint = default_timeshare_constraint;
1378 			info->preemptible = TRUE;
1379 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1380 				info->priority = BASEPRI_RTQUEUES;
1381 			}
1382 		}
1383 
1384 
1385 		break;
1386 	}
1387 
1388 	case THREAD_PRECEDENCE_POLICY:
1389 	{
1390 		thread_precedence_policy_t              info;
1391 
1392 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1393 			result = KERN_INVALID_ARGUMENT;
1394 			break;
1395 		}
1396 
1397 		info = (thread_precedence_policy_t)policy_info;
1398 
1399 		if (!(*get_default)) {
1400 			spl_t s = splsched();
1401 			thread_lock(thread);
1402 
1403 			info->importance = thread->importance;
1404 
1405 			thread_unlock(thread);
1406 			splx(s);
1407 		} else {
1408 			info->importance = 0;
1409 		}
1410 
1411 		break;
1412 	}
1413 
1414 	case THREAD_AFFINITY_POLICY:
1415 	{
1416 		thread_affinity_policy_t                info;
1417 
1418 		if (!thread_affinity_is_supported()) {
1419 			result = KERN_NOT_SUPPORTED;
1420 			break;
1421 		}
1422 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1423 			result = KERN_INVALID_ARGUMENT;
1424 			break;
1425 		}
1426 
1427 		info = (thread_affinity_policy_t)policy_info;
1428 
1429 		if (!(*get_default)) {
1430 			info->affinity_tag = thread_affinity_get(thread);
1431 		} else {
1432 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1433 		}
1434 
1435 		break;
1436 	}
1437 
1438 	case THREAD_POLICY_STATE:
1439 	{
1440 		thread_policy_state_t           info;
1441 
1442 		if (*count < THREAD_POLICY_STATE_COUNT) {
1443 			result = KERN_INVALID_ARGUMENT;
1444 			break;
1445 		}
1446 
1447 		/* Only root can get this info */
1448 		if (!task_is_privileged(current_task())) {
1449 			result = KERN_PROTECTION_FAILURE;
1450 			break;
1451 		}
1452 
1453 		info = (thread_policy_state_t)(void*)policy_info;
1454 
1455 		if (!(*get_default)) {
1456 			info->flags = 0;
1457 
1458 			spl_t s = splsched();
1459 			thread_lock(thread);
1460 
1461 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1462 
1463 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1464 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1465 
1466 			info->thps_user_promotions          = 0;
1467 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1468 			info->thps_ipc_overrides            = thread->kevent_overrides;
1469 
1470 			proc_get_thread_policy_bitfield(thread, info);
1471 
1472 			thread_unlock(thread);
1473 			splx(s);
1474 		} else {
1475 			info->requested = 0;
1476 			info->effective = 0;
1477 			info->pending = 0;
1478 		}
1479 
1480 		break;
1481 	}
1482 
1483 	case THREAD_REQUESTED_STATE_POLICY:
1484 	{
1485 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1486 			result = KERN_INVALID_ARGUMENT;
1487 			break;
1488 		}
1489 
1490 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1491 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1492 
1493 		info->thrq_base_qos = req_policy->thrp_qos;
1494 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1495 		info->thrq_qos_override = req_policy->thrp_qos_override;
1496 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1497 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1498 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1499 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1500 
1501 		break;
1502 	}
1503 
1504 	case THREAD_LATENCY_QOS_POLICY:
1505 	{
1506 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1507 		thread_latency_qos_t plqos;
1508 
1509 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1510 			result = KERN_INVALID_ARGUMENT;
1511 			break;
1512 		}
1513 
1514 		if (*get_default) {
1515 			plqos = 0;
1516 		} else {
1517 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1518 		}
1519 
1520 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1521 	}
1522 	break;
1523 
1524 	case THREAD_THROUGHPUT_QOS_POLICY:
1525 	{
1526 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1527 		thread_throughput_qos_t ptqos;
1528 
1529 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1530 			result = KERN_INVALID_ARGUMENT;
1531 			break;
1532 		}
1533 
1534 		if (*get_default) {
1535 			ptqos = 0;
1536 		} else {
1537 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1538 		}
1539 
1540 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1541 	}
1542 	break;
1543 
1544 	case THREAD_QOS_POLICY:
1545 	{
1546 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1547 
1548 		if (*count < THREAD_QOS_POLICY_COUNT) {
1549 			result = KERN_INVALID_ARGUMENT;
1550 			break;
1551 		}
1552 
1553 		if (!(*get_default)) {
1554 			int relprio_value = 0;
1555 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1556 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1557 
1558 			info->tier_importance = -relprio_value;
1559 		} else {
1560 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1561 			info->tier_importance = 0;
1562 		}
1563 
1564 		break;
1565 	}
1566 
1567 	default:
1568 		result = KERN_INVALID_ARGUMENT;
1569 		break;
1570 	}
1571 
1572 	thread_mtx_unlock(thread);
1573 
1574 	return result;
1575 }
1576 
1577 void
thread_policy_create(thread_t thread)1578 thread_policy_create(thread_t thread)
1579 {
1580 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1581 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1582 	    thread_tid(thread), theffective_0(thread),
1583 	    theffective_1(thread), thread->base_pri, 0);
1584 
1585 	/* We pass a pend token but ignore it */
1586 	struct task_pend_token pend_token = {};
1587 
1588 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1589 
1590 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1591 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1592 	    thread_tid(thread), theffective_0(thread),
1593 	    theffective_1(thread), thread->base_pri, 0);
1594 }
1595 
1596 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1597 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1598 {
1599 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1600 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1601 	    thread_tid(thread), theffective_0(thread),
1602 	    theffective_1(thread), thread->base_pri, 0);
1603 
1604 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1605 
1606 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1607 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1608 	    thread_tid(thread), theffective_0(thread),
1609 	    theffective_1(thread), thread->base_pri, 0);
1610 }
1611 
1612 
1613 
1614 /*
1615  * One thread state update function TO RULE THEM ALL
1616  *
1617  * This function updates the thread effective policy fields
1618  * and pushes the results to the relevant subsystems.
1619  *
1620  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1621  */
1622 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1623 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1624     task_pend_token_t pend_token)
1625 {
1626 	/*
1627 	 * Step 1:
1628 	 *  Gather requested policy and effective task state
1629 	 */
1630 
1631 	const struct thread_requested_policy requested = thread->requested_policy;
1632 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1633 
1634 	/*
1635 	 * Step 2:
1636 	 *  Calculate new effective policies from requested policy, task and thread state
1637 	 *  Rules:
1638 	 *      Don't change requested, it won't take effect
1639 	 */
1640 
1641 	struct thread_effective_policy next = {};
1642 
1643 	next.thep_wi_driven = requested.thrp_wi_driven;
1644 
1645 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1646 
1647 	uint32_t next_qos = requested.thrp_qos;
1648 
1649 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1650 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1651 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1652 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1653 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1654 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1655 	}
1656 
1657 	if (task_effective.tep_darwinbg && task_effective.tep_promote_above_task &&
1658 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1659 		/*
1660 		 * This thread is turnstile-boosted higher than the background clamp
1661 		 * by a synchronous waiter, and this clamp allows that to override the
1662 		 * clamp temporarily for this thread only.
1663 		 */
1664 		next.thep_promote_above_task = true;
1665 		next_qos = requested.thrp_qos_promote;
1666 	}
1667 
1668 	next.thep_qos = next_qos;
1669 
1670 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1671 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1672 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1673 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1674 		} else {
1675 			next.thep_qos = task_effective.tep_qos_clamp;
1676 		}
1677 		next.thep_wi_driven = 0;
1678 	}
1679 
1680 	/*
1681 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1682 	 * This allows QoS promotions to work properly even after the process is unclamped.
1683 	 */
1684 	next.thep_qos_promote = next.thep_qos;
1685 
1686 	/* The ceiling only applies to threads that are in the QoS world */
1687 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1688 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1689 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1690 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1691 	}
1692 
1693 	/*
1694 	 * The QoS relative priority is only applicable when the original programmer's
1695 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1696 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1697 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1698 	 * presence of boosting, the programmer doesn't know what other actors
1699 	 * are boosting the thread.
1700 	 */
1701 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1702 	    (requested.thrp_qos == next.thep_qos) &&
1703 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1704 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1705 	} else {
1706 		next.thep_qos_relprio = 0;
1707 	}
1708 
1709 	/* Calculate DARWIN_BG */
1710 	bool wants_darwinbg        = false;
1711 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1712 
1713 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1714 		wants_darwinbg = true;
1715 	}
1716 
1717 	/*
1718 	 * If DARWIN_BG has been requested at either level, it's engaged.
1719 	 * darwinbg threads always create bg sockets,
1720 	 * but only some types of darwinbg change the sockets
1721 	 * after they're created
1722 	 */
1723 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1724 		wants_all_sockets_bg = wants_darwinbg = true;
1725 	}
1726 
1727 	if (requested.thrp_pidbind_bg) {
1728 		wants_all_sockets_bg = wants_darwinbg = true;
1729 	}
1730 
1731 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1732 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1733 		wants_darwinbg = true;
1734 	}
1735 
1736 	/* Calculate side effects of DARWIN_BG */
1737 
1738 	if (wants_darwinbg) {
1739 		next.thep_darwinbg = 1;
1740 		next.thep_wi_driven = 0;
1741 	}
1742 
1743 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1744 		next.thep_new_sockets_bg = 1;
1745 	}
1746 
1747 	/* Don't use task_effective.tep_all_sockets_bg here */
1748 	if (wants_all_sockets_bg) {
1749 		next.thep_all_sockets_bg = 1;
1750 	}
1751 
1752 	/* darwinbg implies background QOS (or lower) */
1753 	if (next.thep_darwinbg &&
1754 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1755 		next.thep_qos = THREAD_QOS_BACKGROUND;
1756 		next.thep_qos_relprio = 0;
1757 	}
1758 
1759 	/* Calculate IO policy */
1760 
1761 	int iopol = THROTTLE_LEVEL_TIER0;
1762 
1763 	/* Factor in the task's IO policy */
1764 	if (next.thep_darwinbg) {
1765 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1766 	}
1767 
1768 	if (!next.thep_promote_above_task) {
1769 		iopol = MAX(iopol, task_effective.tep_io_tier);
1770 	}
1771 
1772 	/* Look up the associated IO tier value for the QoS class */
1773 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1774 
1775 	iopol = MAX(iopol, requested.thrp_int_iotier);
1776 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1777 
1778 	/* Apply the kevent iotier override */
1779 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1780 
1781 	next.thep_io_tier = iopol;
1782 
1783 	/*
1784 	 * If a QoS override is causing IO to go into a lower tier, we also set
1785 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1786 	 * window when the override goes away.
1787 	 */
1788 
1789 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1790 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1791 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1792 
1793 	/* Calculate Passive IO policy */
1794 	if (requested.thrp_ext_iopassive ||
1795 	    requested.thrp_int_iopassive ||
1796 	    qos_io_override_active ||
1797 	    task_effective.tep_io_passive) {
1798 		next.thep_io_passive = 1;
1799 	}
1800 
1801 	/* Calculate timer QOS */
1802 	uint32_t latency_qos = requested.thrp_latency_qos;
1803 
1804 	if (!next.thep_promote_above_task) {
1805 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1806 	}
1807 
1808 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1809 
1810 	next.thep_latency_qos = latency_qos;
1811 
1812 	/* Calculate throughput QOS */
1813 	uint32_t through_qos = requested.thrp_through_qos;
1814 
1815 	if (!next.thep_promote_above_task) {
1816 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1817 	}
1818 
1819 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1820 
1821 	next.thep_through_qos = through_qos;
1822 
1823 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1824 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1825 		next.thep_terminated    = 1;
1826 		next.thep_darwinbg      = 0;
1827 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1828 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1829 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1830 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1831 		next.thep_wi_driven     = 0;
1832 	}
1833 
1834 	/*
1835 	 * Step 3:
1836 	 *  Swap out old policy for new policy
1837 	 */
1838 
1839 	struct thread_effective_policy prev = thread->effective_policy;
1840 
1841 	thread_update_qos_cpu_time_locked(thread);
1842 
1843 	/* This is the point where the new values become visible to other threads */
1844 	thread->effective_policy = next;
1845 
1846 	/*
1847 	 * Step 4:
1848 	 *  Pend updates that can't be done while holding the thread lock
1849 	 */
1850 
1851 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1852 		pend_token->tpt_update_sockets = 1;
1853 	}
1854 
1855 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1856 	if (prev.thep_io_tier != next.thep_io_tier) {
1857 		pend_token->tpt_update_throttle = 1;
1858 	}
1859 
1860 	/*
1861 	 * Check for the attributes that sfi_thread_classify() consults,
1862 	 *  and trigger SFI re-evaluation.
1863 	 */
1864 	if (prev.thep_qos != next.thep_qos ||
1865 	    prev.thep_darwinbg != next.thep_darwinbg) {
1866 		pend_token->tpt_update_thread_sfi = 1;
1867 	}
1868 
1869 	integer_t old_base_pri = thread->base_pri;
1870 
1871 	/* promote-above-task generates its own dedicated tracepoint */
1872 	if (prev.thep_promote_above_task != next.thep_promote_above_task) {
1873 		KDBG_RELEASE(IMPORTANCE_CODE(IMP_THREAD_PROMOTE_ABOVE_TASK, 0) |
1874 		    (next.thep_promote_above_task ? DBG_FUNC_START : DBG_FUNC_END),
1875 		    thread_tid(thread), next.thep_terminated);
1876 	}
1877 
1878 	/*
1879 	 * Step 5:
1880 	 *  Update other subsystems as necessary if something has changed
1881 	 */
1882 
1883 	/* Check for the attributes that thread_recompute_priority() consults */
1884 	if (prev.thep_qos != next.thep_qos ||
1885 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1886 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1887 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1888 	    prev.thep_terminated != next.thep_terminated ||
1889 	    prev.thep_wi_driven != next.thep_wi_driven ||
1890 	    pend_token->tpt_force_recompute_pri == 1 ||
1891 	    recompute_priority) {
1892 		thread_recompute_priority(thread);
1893 	}
1894 
1895 	/*
1896 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1897 	 */
1898 	if (pend_token->tpt_update_turnstile &&
1899 	    ((old_base_pri == thread->base_pri) ||
1900 	    !thread_get_waiting_turnstile(thread))) {
1901 		/*
1902 		 * Reset update turnstile pend token since either
1903 		 * the thread priority did not change or thread is
1904 		 * not blocked on a turnstile.
1905 		 */
1906 		pend_token->tpt_update_turnstile = 0;
1907 	}
1908 }
1909 
1910 
1911 /*
1912  * Initiate a thread policy state transition on a thread with its TID
1913  * Useful if you cannot guarantee the thread won't get terminated
1914  * Precondition: No locks are held
1915  * Will take task lock - using the non-tid variant is faster
1916  * if you already have a thread ref.
1917  */
1918 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1919 proc_set_thread_policy_with_tid(task_t     task,
1920     uint64_t   tid,
1921     int        category,
1922     int        flavor,
1923     int        value)
1924 {
1925 	/* takes task lock, returns ref'ed thread or NULL */
1926 	thread_t thread = task_findtid(task, tid);
1927 
1928 	if (thread == THREAD_NULL) {
1929 		return;
1930 	}
1931 
1932 	proc_set_thread_policy(thread, category, flavor, value);
1933 
1934 	thread_deallocate(thread);
1935 }
1936 
1937 /*
1938  * Initiate a thread policy transition on a thread
1939  * This path supports networking transitions (i.e. darwinbg transitions)
1940  * Precondition: No locks are held
1941  */
1942 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1943 proc_set_thread_policy(thread_t   thread,
1944     int        category,
1945     int        flavor,
1946     int        value)
1947 {
1948 	proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1949 }
1950 
1951 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1952 proc_set_thread_policy_ext(thread_t   thread,
1953     int        category,
1954     int        flavor,
1955     int        value,
1956     int        value2)
1957 {
1958 	struct task_pend_token pend_token = {};
1959 
1960 	thread_mtx_lock(thread);
1961 
1962 	proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1963 
1964 	thread_mtx_unlock(thread);
1965 
1966 	thread_policy_update_complete_unlocked(thread, &pend_token);
1967 }
1968 
1969 /*
1970  * Do the things that can't be done while holding a thread mutex.
1971  * These are set up to call back into thread policy to get the latest value,
1972  * so they don't have to be synchronized with the update.
1973  * The only required semantic is 'call this sometime after updating effective policy'
1974  *
1975  * Precondition: Thread mutex is not held
1976  *
1977  * This may be called with the task lock held, but in that case it won't be
1978  * called with tpt_update_sockets set.
1979  */
1980 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1981 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1982 {
1983 #ifdef MACH_BSD
1984 	if (pend_token->tpt_update_sockets) {
1985 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1986 	}
1987 #endif /* MACH_BSD */
1988 
1989 	if (pend_token->tpt_update_throttle) {
1990 		rethrottle_thread(get_bsdthread_info(thread));
1991 	}
1992 
1993 	if (pend_token->tpt_update_thread_sfi) {
1994 		sfi_reevaluate(thread);
1995 	}
1996 
1997 	if (pend_token->tpt_update_turnstile) {
1998 		turnstile_update_thread_priority_chain(thread);
1999 	}
2000 }
2001 
2002 /*
2003  * Set and update thread policy
2004  * Thread mutex might be held
2005  */
2006 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2007 proc_set_thread_policy_locked(thread_t          thread,
2008     int               category,
2009     int               flavor,
2010     int               value,
2011     int               value2,
2012     task_pend_token_t pend_token)
2013 {
2014 	spl_t s = splsched();
2015 	thread_lock(thread);
2016 
2017 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2018 
2019 	thread_unlock(thread);
2020 	splx(s);
2021 }
2022 
2023 /*
2024  * Set and update thread policy
2025  * Thread spinlock is held
2026  */
2027 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2028 proc_set_thread_policy_spinlocked(thread_t          thread,
2029     int               category,
2030     int               flavor,
2031     int               value,
2032     int               value2,
2033     task_pend_token_t pend_token)
2034 {
2035 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2036 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2037 	    thread_tid(thread), threquested_0(thread),
2038 	    threquested_1(thread), value, 0);
2039 
2040 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2041 
2042 	thread_policy_update_spinlocked(thread, false, pend_token);
2043 
2044 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2045 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2046 	    thread_tid(thread), threquested_0(thread),
2047 	    threquested_1(thread), tpending(pend_token), 0);
2048 }
2049 
2050 /*
2051  * Set the requested state for a specific flavor to a specific value.
2052  */
2053 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2054 thread_set_requested_policy_spinlocked(thread_t     thread,
2055     int               category,
2056     int               flavor,
2057     int               value,
2058     int               value2,
2059     task_pend_token_t pend_token)
2060 {
2061 	int tier, passive;
2062 
2063 	struct thread_requested_policy requested = thread->requested_policy;
2064 
2065 	switch (flavor) {
2066 	/* Category: EXTERNAL and INTERNAL, thread and task */
2067 
2068 	case TASK_POLICY_DARWIN_BG:
2069 		if (category == TASK_POLICY_EXTERNAL) {
2070 			requested.thrp_ext_darwinbg = value;
2071 		} else {
2072 			requested.thrp_int_darwinbg = value;
2073 		}
2074 		pend_token->tpt_update_turnstile = 1;
2075 		break;
2076 
2077 	case TASK_POLICY_IOPOL:
2078 		proc_iopol_to_tier(value, &tier, &passive);
2079 		if (category == TASK_POLICY_EXTERNAL) {
2080 			requested.thrp_ext_iotier  = tier;
2081 			requested.thrp_ext_iopassive = passive;
2082 		} else {
2083 			requested.thrp_int_iotier  = tier;
2084 			requested.thrp_int_iopassive = passive;
2085 		}
2086 		break;
2087 
2088 	case TASK_POLICY_IO:
2089 		if (category == TASK_POLICY_EXTERNAL) {
2090 			requested.thrp_ext_iotier = value;
2091 		} else {
2092 			requested.thrp_int_iotier = value;
2093 		}
2094 		break;
2095 
2096 	case TASK_POLICY_PASSIVE_IO:
2097 		if (category == TASK_POLICY_EXTERNAL) {
2098 			requested.thrp_ext_iopassive = value;
2099 		} else {
2100 			requested.thrp_int_iopassive = value;
2101 		}
2102 		break;
2103 
2104 	/* Category: ATTRIBUTE, thread only */
2105 
2106 	case TASK_POLICY_PIDBIND_BG:
2107 		assert(category == TASK_POLICY_ATTRIBUTE);
2108 		requested.thrp_pidbind_bg = value;
2109 		pend_token->tpt_update_turnstile = 1;
2110 		break;
2111 
2112 	case TASK_POLICY_LATENCY_QOS:
2113 		assert(category == TASK_POLICY_ATTRIBUTE);
2114 		requested.thrp_latency_qos = value;
2115 		break;
2116 
2117 	case TASK_POLICY_THROUGH_QOS:
2118 		assert(category == TASK_POLICY_ATTRIBUTE);
2119 		requested.thrp_through_qos = value;
2120 		break;
2121 
2122 	case TASK_POLICY_QOS_OVERRIDE:
2123 		assert(category == TASK_POLICY_ATTRIBUTE);
2124 		requested.thrp_qos_override = value;
2125 		pend_token->tpt_update_turnstile = 1;
2126 		break;
2127 
2128 	case TASK_POLICY_QOS_AND_RELPRIO:
2129 		assert(category == TASK_POLICY_ATTRIBUTE);
2130 		requested.thrp_qos = value;
2131 		requested.thrp_qos_relprio = value2;
2132 		pend_token->tpt_update_turnstile = 1;
2133 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2134 		break;
2135 
2136 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2137 		assert(category == TASK_POLICY_ATTRIBUTE);
2138 		requested.thrp_qos_workq_override = value;
2139 		pend_token->tpt_update_turnstile = 1;
2140 		break;
2141 
2142 	case TASK_POLICY_QOS_PROMOTE:
2143 		assert(category == TASK_POLICY_ATTRIBUTE);
2144 		requested.thrp_qos_promote = value;
2145 		break;
2146 
2147 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2148 		assert(category == TASK_POLICY_ATTRIBUTE);
2149 		requested.thrp_qos_kevent_override = value;
2150 		pend_token->tpt_update_turnstile = 1;
2151 		break;
2152 
2153 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2154 		assert(category == TASK_POLICY_ATTRIBUTE);
2155 		requested.thrp_qos_wlsvc_override = value;
2156 		pend_token->tpt_update_turnstile = 1;
2157 		break;
2158 
2159 	case TASK_POLICY_TERMINATED:
2160 		assert(category == TASK_POLICY_ATTRIBUTE);
2161 		requested.thrp_terminated = value;
2162 		break;
2163 
2164 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2165 		assert(category == TASK_POLICY_ATTRIBUTE);
2166 		requested.thrp_iotier_kevent_override = value;
2167 		break;
2168 
2169 	case TASK_POLICY_WI_DRIVEN:
2170 		assert(category == TASK_POLICY_ATTRIBUTE);
2171 		assert(thread == current_thread());
2172 
2173 		const bool set_policy = value;
2174 		const sched_mode_t mode = value2;
2175 
2176 		requested.thrp_wi_driven = set_policy ? 1 : 0;
2177 
2178 		/*
2179 		 * No sched mode change for REALTIME (threads must explicitly
2180 		 * opt-in), however the priority_offset needs to be updated.
2181 		 */
2182 		if (mode == TH_MODE_REALTIME) {
2183 			const int pri = work_interval_get_priority(thread);
2184 			assert3u(pri, >=, BASEPRI_RTQUEUES);
2185 			thread->realtime.priority_offset = set_policy ?
2186 			    (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2187 		} else {
2188 			sched_set_thread_mode_user(thread, mode);
2189 			if (set_policy) {
2190 				thread->static_param = true;
2191 			}
2192 		}
2193 		break;
2194 
2195 	default:
2196 		panic("unknown task policy: %d %d %d", category, flavor, value);
2197 		break;
2198 	}
2199 
2200 	thread->requested_policy = requested;
2201 }
2202 
2203 /*
2204  * Gets what you set. Effective values may be different.
2205  * Precondition: No locks are held
2206  */
2207 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2208 proc_get_thread_policy(thread_t   thread,
2209     int        category,
2210     int        flavor)
2211 {
2212 	int value = 0;
2213 	thread_mtx_lock(thread);
2214 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2215 	thread_mtx_unlock(thread);
2216 	return value;
2217 }
2218 
2219 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2220 proc_get_thread_policy_locked(thread_t   thread,
2221     int        category,
2222     int        flavor,
2223     int*       value2)
2224 {
2225 	int value = 0;
2226 
2227 	spl_t s = splsched();
2228 	thread_lock(thread);
2229 
2230 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2231 
2232 	thread_unlock(thread);
2233 	splx(s);
2234 
2235 	return value;
2236 }
2237 
2238 /*
2239  * Gets what you set. Effective values may be different.
2240  */
2241 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2242 thread_get_requested_policy_spinlocked(thread_t thread,
2243     int      category,
2244     int      flavor,
2245     int*     value2)
2246 {
2247 	int value = 0;
2248 
2249 	struct thread_requested_policy requested = thread->requested_policy;
2250 
2251 	switch (flavor) {
2252 	case TASK_POLICY_DARWIN_BG:
2253 		if (category == TASK_POLICY_EXTERNAL) {
2254 			value = requested.thrp_ext_darwinbg;
2255 		} else {
2256 			value = requested.thrp_int_darwinbg;
2257 		}
2258 		break;
2259 	case TASK_POLICY_IOPOL:
2260 		if (category == TASK_POLICY_EXTERNAL) {
2261 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2262 			    requested.thrp_ext_iopassive);
2263 		} else {
2264 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2265 			    requested.thrp_int_iopassive);
2266 		}
2267 		break;
2268 	case TASK_POLICY_IO:
2269 		if (category == TASK_POLICY_EXTERNAL) {
2270 			value = requested.thrp_ext_iotier;
2271 		} else {
2272 			value = requested.thrp_int_iotier;
2273 		}
2274 		break;
2275 	case TASK_POLICY_PASSIVE_IO:
2276 		if (category == TASK_POLICY_EXTERNAL) {
2277 			value = requested.thrp_ext_iopassive;
2278 		} else {
2279 			value = requested.thrp_int_iopassive;
2280 		}
2281 		break;
2282 	case TASK_POLICY_QOS:
2283 		assert(category == TASK_POLICY_ATTRIBUTE);
2284 		value = requested.thrp_qos;
2285 		break;
2286 	case TASK_POLICY_QOS_OVERRIDE:
2287 		assert(category == TASK_POLICY_ATTRIBUTE);
2288 		value = requested.thrp_qos_override;
2289 		break;
2290 	case TASK_POLICY_LATENCY_QOS:
2291 		assert(category == TASK_POLICY_ATTRIBUTE);
2292 		value = requested.thrp_latency_qos;
2293 		break;
2294 	case TASK_POLICY_THROUGH_QOS:
2295 		assert(category == TASK_POLICY_ATTRIBUTE);
2296 		value = requested.thrp_through_qos;
2297 		break;
2298 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2299 		assert(category == TASK_POLICY_ATTRIBUTE);
2300 		value = requested.thrp_qos_workq_override;
2301 		break;
2302 	case TASK_POLICY_QOS_AND_RELPRIO:
2303 		assert(category == TASK_POLICY_ATTRIBUTE);
2304 		assert(value2 != NULL);
2305 		value = requested.thrp_qos;
2306 		*value2 = requested.thrp_qos_relprio;
2307 		break;
2308 	case TASK_POLICY_QOS_PROMOTE:
2309 		assert(category == TASK_POLICY_ATTRIBUTE);
2310 		value = requested.thrp_qos_promote;
2311 		break;
2312 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2313 		assert(category == TASK_POLICY_ATTRIBUTE);
2314 		value = requested.thrp_qos_kevent_override;
2315 		break;
2316 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2317 		assert(category == TASK_POLICY_ATTRIBUTE);
2318 		value = requested.thrp_qos_wlsvc_override;
2319 		break;
2320 	case TASK_POLICY_TERMINATED:
2321 		assert(category == TASK_POLICY_ATTRIBUTE);
2322 		value = requested.thrp_terminated;
2323 		break;
2324 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2325 		assert(category == TASK_POLICY_ATTRIBUTE);
2326 		value = requested.thrp_iotier_kevent_override;
2327 		break;
2328 
2329 	case TASK_POLICY_WI_DRIVEN:
2330 		assert(category == TASK_POLICY_ATTRIBUTE);
2331 		value = requested.thrp_wi_driven;
2332 		break;
2333 
2334 	default:
2335 		panic("unknown policy_flavor %d", flavor);
2336 		break;
2337 	}
2338 
2339 	return value;
2340 }
2341 
2342 /*
2343  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2344  *
2345  * NOTE: This accessor does not take the task or thread lock.
2346  * Notifications of state updates need to be externally synchronized with state queries.
2347  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2348  * within the context of a timer interrupt.
2349  *
2350  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2351  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2352  *      I don't think that cost is worth not having the right answer.
2353  */
2354 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2355 proc_get_effective_thread_policy(thread_t thread,
2356     int      flavor)
2357 {
2358 	int value = 0;
2359 
2360 	switch (flavor) {
2361 	case TASK_POLICY_DARWIN_BG:
2362 		/*
2363 		 * This call is used within the timer layer, as well as
2364 		 * prioritizing requests to the graphics system.
2365 		 * It also informs SFI and originator-bg-state.
2366 		 * Returns 1 for background mode, 0 for normal mode
2367 		 */
2368 
2369 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2370 		break;
2371 	case TASK_POLICY_IO:
2372 		/*
2373 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2374 		 * Returns THROTTLE_LEVEL_* values
2375 		 */
2376 		value = thread->effective_policy.thep_io_tier;
2377 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2378 			value = MIN(value, thread->iotier_override);
2379 		}
2380 		break;
2381 	case TASK_POLICY_PASSIVE_IO:
2382 		/*
2383 		 * The I/O system calls here to find out whether an operation should be passive.
2384 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2385 		 * Returns 1 for passive mode, 0 for normal mode
2386 		 *
2387 		 * If an override is causing IO to go into a lower tier, we also set
2388 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2389 		 * window when the override goes away.
2390 		 */
2391 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2392 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2393 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2394 			value = 1;
2395 		}
2396 		break;
2397 	case TASK_POLICY_ALL_SOCKETS_BG:
2398 		/*
2399 		 * do_background_socket() calls this to determine whether
2400 		 * it should change the thread's sockets
2401 		 * Returns 1 for background mode, 0 for normal mode
2402 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2403 		 * doesn't get you out of the network throttle.
2404 		 */
2405 		value = (thread->effective_policy.thep_all_sockets_bg ||
2406 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2407 		break;
2408 	case TASK_POLICY_NEW_SOCKETS_BG:
2409 		/*
2410 		 * socreate() calls this to determine if it should mark a new socket as background
2411 		 * Returns 1 for background mode, 0 for normal mode
2412 		 */
2413 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2414 		break;
2415 	case TASK_POLICY_LATENCY_QOS:
2416 		/*
2417 		 * timer arming calls into here to find out the timer coalescing level
2418 		 * Returns a latency QoS tier (0-6)
2419 		 */
2420 		value = thread->effective_policy.thep_latency_qos;
2421 		break;
2422 	case TASK_POLICY_THROUGH_QOS:
2423 		/*
2424 		 * This value is passed into the urgency callout from the scheduler
2425 		 * to the performance management subsystem.
2426 		 *
2427 		 * Returns a throughput QoS tier (0-6)
2428 		 */
2429 		value = thread->effective_policy.thep_through_qos;
2430 		break;
2431 	case TASK_POLICY_QOS:
2432 		/*
2433 		 * This is communicated to the performance management layer and SFI.
2434 		 *
2435 		 * Returns a QoS policy tier
2436 		 */
2437 		value = thread->effective_policy.thep_qos;
2438 		break;
2439 	default:
2440 		panic("unknown thread policy flavor %d", flavor);
2441 		break;
2442 	}
2443 
2444 	return value;
2445 }
2446 
2447 
2448 /*
2449  * (integer_t) casts limit the number of bits we can fit here
2450  * this interface is deprecated and replaced by the _EXT struct ?
2451  */
2452 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2453 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2454 {
2455 	uint64_t bits = 0;
2456 	struct thread_requested_policy requested = thread->requested_policy;
2457 
2458 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2459 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2460 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2461 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2462 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2463 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2464 
2465 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2466 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2467 
2468 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2469 
2470 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2471 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2472 
2473 	info->requested = (integer_t) bits;
2474 	bits = 0;
2475 
2476 	struct thread_effective_policy effective = thread->effective_policy;
2477 
2478 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2479 
2480 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2481 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2482 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2483 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2484 
2485 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2486 
2487 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2488 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2489 
2490 	info->effective = (integer_t)bits;
2491 	bits = 0;
2492 
2493 	info->pending = 0;
2494 }
2495 
2496 /*
2497  * Sneakily trace either the task and thread requested
2498  * or just the thread requested, depending on if we have enough room.
2499  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2500  *
2501  *                                LP32            LP64
2502  * threquested_0(thread)          thread[0]       task[0]
2503  * threquested_1(thread)          thread[1]       thread[0]
2504  *
2505  */
2506 
2507 uintptr_t
threquested_0(thread_t thread)2508 threquested_0(thread_t thread)
2509 {
2510 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2511 
2512 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2513 
2514 	return raw[0];
2515 }
2516 
2517 uintptr_t
threquested_1(thread_t thread)2518 threquested_1(thread_t thread)
2519 {
2520 #if defined __LP64__
2521 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2522 #else
2523 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2524 	return raw[1];
2525 #endif
2526 }
2527 
2528 uintptr_t
theffective_0(thread_t thread)2529 theffective_0(thread_t thread)
2530 {
2531 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2532 
2533 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2534 	return raw[0];
2535 }
2536 
2537 uintptr_t
theffective_1(thread_t thread)2538 theffective_1(thread_t thread)
2539 {
2540 #if defined __LP64__
2541 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2542 #else
2543 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2544 	return raw[1];
2545 #endif
2546 }
2547 
2548 
2549 /*
2550  * Set an override on the thread which is consulted with a
2551  * higher priority than the task/thread policy. This should
2552  * only be set for temporary grants until the thread
2553  * returns to the userspace boundary
2554  *
2555  * We use atomic operations to swap in the override, with
2556  * the assumption that the thread itself can
2557  * read the override and clear it on return to userspace.
2558  *
2559  * No locking is performed, since it is acceptable to see
2560  * a stale override for one loop through throttle_lowpri_io().
2561  * However a thread reference must be held on the thread.
2562  */
2563 
2564 void
set_thread_iotier_override(thread_t thread,int policy)2565 set_thread_iotier_override(thread_t thread, int policy)
2566 {
2567 	int current_override;
2568 
2569 	/* Let most aggressive I/O policy win until user boundary */
2570 	do {
2571 		current_override = thread->iotier_override;
2572 
2573 		if (current_override != THROTTLE_LEVEL_NONE) {
2574 			policy = MIN(current_override, policy);
2575 		}
2576 
2577 		if (current_override == policy) {
2578 			/* no effective change */
2579 			return;
2580 		}
2581 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2582 
2583 	/*
2584 	 * Since the thread may be currently throttled,
2585 	 * re-evaluate tiers and potentially break out
2586 	 * of an msleep
2587 	 */
2588 	rethrottle_thread(get_bsdthread_info(thread));
2589 }
2590 
2591 /*
2592  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2593  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2594  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2595  * priority thread. In these cases, we attempt to propagate the priority token, as long
2596  * as the subsystem informs us of the relationships between the threads. The userspace
2597  * synchronization subsystem should maintain the information of owner->resource and
2598  * resource->waiters itself.
2599  */
2600 
2601 /*
2602  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2603  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2604  * to be handled specially in the future, but for now it's fine to slam
2605  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2606  */
2607 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2608 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2609 {
2610 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2611 		/* Map all input resource/type to a single one */
2612 		*resource = USER_ADDR_NULL;
2613 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2614 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2615 		/* no transform */
2616 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2617 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2618 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2619 			*resource = USER_ADDR_NULL;
2620 		}
2621 	}
2622 }
2623 
2624 /* This helper routine finds an existing override if known. Locking should be done by caller */
2625 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2626 find_qos_override(thread_t thread,
2627     user_addr_t resource,
2628     int resource_type)
2629 {
2630 	struct thread_qos_override *override;
2631 
2632 	override = thread->overrides;
2633 	while (override) {
2634 		if (override->override_resource == resource &&
2635 		    override->override_resource_type == resource_type) {
2636 			return override;
2637 		}
2638 
2639 		override = override->override_next;
2640 	}
2641 
2642 	return NULL;
2643 }
2644 
2645 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2646 find_and_decrement_qos_override(thread_t       thread,
2647     user_addr_t    resource,
2648     int            resource_type,
2649     boolean_t      reset,
2650     struct thread_qos_override **free_override_list)
2651 {
2652 	struct thread_qos_override *override, *override_prev;
2653 
2654 	override_prev = NULL;
2655 	override = thread->overrides;
2656 	while (override) {
2657 		struct thread_qos_override *override_next = override->override_next;
2658 
2659 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2660 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2661 			if (reset) {
2662 				override->override_contended_resource_count = 0;
2663 			} else {
2664 				override->override_contended_resource_count--;
2665 			}
2666 
2667 			if (override->override_contended_resource_count == 0) {
2668 				if (override_prev == NULL) {
2669 					thread->overrides = override_next;
2670 				} else {
2671 					override_prev->override_next = override_next;
2672 				}
2673 
2674 				/* Add to out-param for later zfree */
2675 				override->override_next = *free_override_list;
2676 				*free_override_list = override;
2677 			} else {
2678 				override_prev = override;
2679 			}
2680 
2681 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2682 				return;
2683 			}
2684 		} else {
2685 			override_prev = override;
2686 		}
2687 
2688 		override = override_next;
2689 	}
2690 }
2691 
2692 /* This helper recalculates the current requested override using the policy selected at boot */
2693 static int
calculate_requested_qos_override(thread_t thread)2694 calculate_requested_qos_override(thread_t thread)
2695 {
2696 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2697 		return THREAD_QOS_UNSPECIFIED;
2698 	}
2699 
2700 	/* iterate over all overrides and calculate MAX */
2701 	struct thread_qos_override *override;
2702 	int qos_override = THREAD_QOS_UNSPECIFIED;
2703 
2704 	override = thread->overrides;
2705 	while (override) {
2706 		qos_override = MAX(qos_override, override->override_qos);
2707 		override = override->override_next;
2708 	}
2709 
2710 	return qos_override;
2711 }
2712 
2713 /*
2714  * Returns:
2715  * - 0 on success
2716  * - EINVAL if some invalid input was passed
2717  */
2718 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2719 proc_thread_qos_add_override_internal(thread_t         thread,
2720     int              override_qos,
2721     boolean_t        first_override_for_resource,
2722     user_addr_t      resource,
2723     int              resource_type)
2724 {
2725 	struct task_pend_token pend_token = {};
2726 	int rc = 0;
2727 
2728 	thread_mtx_lock(thread);
2729 
2730 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2731 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2732 
2733 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2734 	    uint64_t, thread->requested_policy.thrp_qos,
2735 	    uint64_t, thread->effective_policy.thep_qos,
2736 	    int, override_qos, boolean_t, first_override_for_resource);
2737 
2738 	struct thread_qos_override *override;
2739 	struct thread_qos_override *override_new = NULL;
2740 	int new_qos_override, prev_qos_override;
2741 	int new_effective_qos;
2742 
2743 	canonicalize_resource_and_type(&resource, &resource_type);
2744 
2745 	override = find_qos_override(thread, resource, resource_type);
2746 	if (first_override_for_resource && !override) {
2747 		/* We need to allocate a new object. Drop the thread lock and
2748 		 * recheck afterwards in case someone else added the override
2749 		 */
2750 		thread_mtx_unlock(thread);
2751 		override_new = zalloc(thread_qos_override_zone);
2752 		thread_mtx_lock(thread);
2753 		override = find_qos_override(thread, resource, resource_type);
2754 	}
2755 	if (first_override_for_resource && override) {
2756 		/* Someone else already allocated while the thread lock was dropped */
2757 		override->override_contended_resource_count++;
2758 	} else if (!override && override_new) {
2759 		override = override_new;
2760 		override_new = NULL;
2761 		override->override_next = thread->overrides;
2762 		/* since first_override_for_resource was TRUE */
2763 		override->override_contended_resource_count = 1;
2764 		override->override_resource = resource;
2765 		override->override_resource_type = (int16_t)resource_type;
2766 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2767 		thread->overrides = override;
2768 	}
2769 
2770 	if (override) {
2771 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2772 			override->override_qos = (int16_t)override_qos;
2773 		} else {
2774 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2775 		}
2776 	}
2777 
2778 	/* Determine how to combine the various overrides into a single current
2779 	 * requested override
2780 	 */
2781 	new_qos_override = calculate_requested_qos_override(thread);
2782 
2783 	prev_qos_override = proc_get_thread_policy_locked(thread,
2784 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2785 
2786 	if (new_qos_override != prev_qos_override) {
2787 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2788 		    TASK_POLICY_QOS_OVERRIDE,
2789 		    new_qos_override, 0, &pend_token);
2790 	}
2791 
2792 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2793 
2794 	thread_mtx_unlock(thread);
2795 
2796 	thread_policy_update_complete_unlocked(thread, &pend_token);
2797 
2798 	if (override_new) {
2799 		zfree(thread_qos_override_zone, override_new);
2800 	}
2801 
2802 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2803 	    int, new_qos_override, int, new_effective_qos, int, rc);
2804 
2805 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2806 	    new_qos_override, resource, resource_type, 0, 0);
2807 
2808 	return rc;
2809 }
2810 
2811 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2812 proc_thread_qos_add_override(task_t           task,
2813     thread_t         thread,
2814     uint64_t         tid,
2815     int              override_qos,
2816     boolean_t        first_override_for_resource,
2817     user_addr_t      resource,
2818     int              resource_type)
2819 {
2820 	boolean_t has_thread_reference = FALSE;
2821 	int rc = 0;
2822 
2823 	if (thread == THREAD_NULL) {
2824 		thread = task_findtid(task, tid);
2825 		/* returns referenced thread */
2826 
2827 		if (thread == THREAD_NULL) {
2828 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2829 			    tid, 0, 0xdead, 0, 0);
2830 			return ESRCH;
2831 		}
2832 		has_thread_reference = TRUE;
2833 	} else {
2834 		assert(get_threadtask(thread) == task);
2835 	}
2836 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2837 	    first_override_for_resource, resource, resource_type);
2838 	if (has_thread_reference) {
2839 		thread_deallocate(thread);
2840 	}
2841 
2842 	return rc;
2843 }
2844 
2845 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2846 proc_thread_qos_remove_override_internal(thread_t       thread,
2847     user_addr_t    resource,
2848     int            resource_type,
2849     boolean_t      reset)
2850 {
2851 	struct task_pend_token pend_token = {};
2852 
2853 	struct thread_qos_override *deferred_free_override_list = NULL;
2854 	int new_qos_override, prev_qos_override, new_effective_qos;
2855 
2856 	thread_mtx_lock(thread);
2857 
2858 	canonicalize_resource_and_type(&resource, &resource_type);
2859 
2860 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2861 
2862 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2863 	    thread_tid(thread), resource, reset, 0, 0);
2864 
2865 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2866 	    uint64_t, thread->requested_policy.thrp_qos,
2867 	    uint64_t, thread->effective_policy.thep_qos);
2868 
2869 	/* Determine how to combine the various overrides into a single current requested override */
2870 	new_qos_override = calculate_requested_qos_override(thread);
2871 
2872 	spl_t s = splsched();
2873 	thread_lock(thread);
2874 
2875 	/*
2876 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2877 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2878 	 * This means you can't change the current override from a spinlock-only setter.
2879 	 */
2880 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2881 
2882 	if (new_qos_override != prev_qos_override) {
2883 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2884 	}
2885 
2886 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2887 
2888 	thread_unlock(thread);
2889 	splx(s);
2890 
2891 	thread_mtx_unlock(thread);
2892 
2893 	thread_policy_update_complete_unlocked(thread, &pend_token);
2894 
2895 	while (deferred_free_override_list) {
2896 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2897 
2898 		zfree(thread_qos_override_zone, deferred_free_override_list);
2899 		deferred_free_override_list = override_next;
2900 	}
2901 
2902 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2903 	    int, new_qos_override, int, new_effective_qos);
2904 
2905 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2906 	    thread_tid(thread), 0, 0, 0, 0);
2907 }
2908 
2909 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2910 proc_thread_qos_remove_override(task_t      task,
2911     thread_t    thread,
2912     uint64_t    tid,
2913     user_addr_t resource,
2914     int         resource_type)
2915 {
2916 	boolean_t has_thread_reference = FALSE;
2917 
2918 	if (thread == THREAD_NULL) {
2919 		thread = task_findtid(task, tid);
2920 		/* returns referenced thread */
2921 
2922 		if (thread == THREAD_NULL) {
2923 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2924 			    tid, 0, 0xdead, 0, 0);
2925 			return ESRCH;
2926 		}
2927 		has_thread_reference = TRUE;
2928 	} else {
2929 		assert(task == get_threadtask(thread));
2930 	}
2931 
2932 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2933 
2934 	if (has_thread_reference) {
2935 		thread_deallocate(thread);
2936 	}
2937 
2938 	return 0;
2939 }
2940 
2941 /* Deallocate before thread termination */
2942 void
proc_thread_qos_deallocate(thread_t thread)2943 proc_thread_qos_deallocate(thread_t thread)
2944 {
2945 	/* This thread must have no more IPC overrides. */
2946 	assert(thread->kevent_overrides == 0);
2947 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2948 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2949 
2950 	/*
2951 	 * Clear out any lingering override objects.
2952 	 */
2953 	struct thread_qos_override *override;
2954 
2955 	thread_mtx_lock(thread);
2956 	override = thread->overrides;
2957 	thread->overrides = NULL;
2958 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2959 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2960 	thread_mtx_unlock(thread);
2961 
2962 	while (override) {
2963 		struct thread_qos_override *override_next = override->override_next;
2964 
2965 		zfree(thread_qos_override_zone, override);
2966 		override = override_next;
2967 	}
2968 }
2969 
2970 /*
2971  * Set up the primordial thread's QoS
2972  */
2973 void
task_set_main_thread_qos(task_t task,thread_t thread)2974 task_set_main_thread_qos(task_t task, thread_t thread)
2975 {
2976 	struct task_pend_token pend_token = {};
2977 
2978 	assert(get_threadtask(thread) == task);
2979 
2980 	thread_mtx_lock(thread);
2981 
2982 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2983 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2984 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2985 	    thread->requested_policy.thrp_qos, 0);
2986 
2987 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2988 
2989 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2990 	    primordial_qos, 0, &pend_token);
2991 
2992 	thread_mtx_unlock(thread);
2993 
2994 	thread_policy_update_complete_unlocked(thread, &pend_token);
2995 
2996 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2997 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2998 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2999 	    primordial_qos, 0);
3000 }
3001 
3002 /*
3003  * KPI for pthread kext
3004  *
3005  * Return a good guess at what the initial manager QoS will be
3006  * Dispatch can override this in userspace if it so chooses
3007  */
3008 thread_qos_t
task_get_default_manager_qos(task_t task)3009 task_get_default_manager_qos(task_t task)
3010 {
3011 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
3012 
3013 	if (primordial_qos == THREAD_QOS_LEGACY) {
3014 		primordial_qos = THREAD_QOS_USER_INITIATED;
3015 	}
3016 
3017 	return primordial_qos;
3018 }
3019 
3020 /*
3021  * Check if the kernel promotion on thread has changed
3022  * and apply it.
3023  *
3024  * thread locked on entry and exit
3025  */
3026 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3027 thread_recompute_kernel_promotion_locked(thread_t thread)
3028 {
3029 	boolean_t needs_update = FALSE;
3030 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3031 
3032 	/*
3033 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3034 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3035 	 * and propagates the priority through the chain with the same cap, because as of now it does
3036 	 * not differenciate on the kernel primitive.
3037 	 *
3038 	 * If this assumption will change with the adoption of a kernel primitive that does not
3039 	 * cap the when adding/propagating,
3040 	 * then here is the place to put the generic cap for all kernel primitives
3041 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3042 	 */
3043 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3044 
3045 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3046 		KDBG(MACHDBG_CODE(
3047 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3048 		    thread_tid(thread),
3049 		    kern_promotion_schedpri,
3050 		    thread->kern_promotion_schedpri);
3051 
3052 		needs_update = TRUE;
3053 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3054 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3055 	}
3056 
3057 	return needs_update;
3058 }
3059 
3060 /*
3061  * Check if the user promotion on thread has changed
3062  * and apply it.
3063  *
3064  * thread locked on entry, might drop the thread lock
3065  * and reacquire it.
3066  */
3067 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3068 thread_recompute_user_promotion_locked(thread_t thread)
3069 {
3070 	boolean_t needs_update = FALSE;
3071 	struct task_pend_token pend_token = {};
3072 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3073 	int old_base_pri = thread->base_pri;
3074 	thread_qos_t qos_promotion;
3075 
3076 	/* Check if user promotion has changed */
3077 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3078 		return needs_update;
3079 	} else {
3080 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3081 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3082 		    thread_tid(thread),
3083 		    user_promotion_basepri,
3084 		    thread->user_promotion_basepri,
3085 		    0, 0);
3086 		KDBG(MACHDBG_CODE(
3087 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3088 		    thread_tid(thread),
3089 		    user_promotion_basepri,
3090 		    thread->user_promotion_basepri);
3091 	}
3092 
3093 	/* Update the user promotion base pri */
3094 	thread->user_promotion_basepri = user_promotion_basepri;
3095 	pend_token.tpt_force_recompute_pri = 1;
3096 
3097 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3098 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3099 	} else {
3100 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3101 	}
3102 
3103 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3104 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3105 
3106 	if (thread_get_waiting_turnstile(thread) &&
3107 	    thread->base_pri != old_base_pri) {
3108 		needs_update = TRUE;
3109 	}
3110 
3111 	thread_unlock(thread);
3112 
3113 	thread_policy_update_complete_unlocked(thread, &pend_token);
3114 
3115 	thread_lock(thread);
3116 
3117 	return needs_update;
3118 }
3119 
3120 /*
3121  * Convert the thread user promotion base pri to qos for threads in qos world.
3122  * For priority above UI qos, the qos would be set to UI.
3123  */
3124 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3125 thread_user_promotion_qos_for_pri(int priority)
3126 {
3127 	thread_qos_t qos;
3128 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3129 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3130 			return qos;
3131 		}
3132 	}
3133 	return THREAD_QOS_MAINTENANCE;
3134 }
3135 
3136 /*
3137  * Set the thread's QoS Kevent override
3138  * Owned by the Kevent subsystem
3139  *
3140  * May be called with spinlocks held, but not spinlocks
3141  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3142  *
3143  * One 'add' must be balanced by one 'drop'.
3144  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3145  * Before the thread is deallocated, there must be 0 remaining overrides.
3146  */
3147 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3148 thread_kevent_override(thread_t    thread,
3149     uint32_t    qos_override,
3150     boolean_t   is_new_override)
3151 {
3152 	struct task_pend_token pend_token = {};
3153 	boolean_t needs_update;
3154 
3155 	spl_t s = splsched();
3156 	thread_lock(thread);
3157 
3158 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3159 
3160 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3161 	assert(qos_override < THREAD_QOS_LAST);
3162 
3163 	if (is_new_override) {
3164 		if (thread->kevent_overrides++ == 0) {
3165 			/* This add is the first override for this thread */
3166 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3167 		} else {
3168 			/* There are already other overrides in effect for this thread */
3169 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3170 		}
3171 	} else {
3172 		/* There must be at least one override (the previous add call) in effect */
3173 		assert(thread->kevent_overrides > 0);
3174 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3175 	}
3176 
3177 	/*
3178 	 * We can't allow lowering if there are several IPC overrides because
3179 	 * the caller can't possibly know the whole truth
3180 	 */
3181 	if (thread->kevent_overrides == 1) {
3182 		needs_update = qos_override != old_override;
3183 	} else {
3184 		needs_update = qos_override > old_override;
3185 	}
3186 
3187 	if (needs_update) {
3188 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3189 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3190 		    qos_override, 0, &pend_token);
3191 		assert(pend_token.tpt_update_sockets == 0);
3192 	}
3193 
3194 	thread_unlock(thread);
3195 	splx(s);
3196 
3197 	thread_policy_update_complete_unlocked(thread, &pend_token);
3198 }
3199 
3200 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3201 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3202 {
3203 	thread_kevent_override(thread, qos_override, TRUE);
3204 }
3205 
3206 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3207 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3208 {
3209 	thread_kevent_override(thread, qos_override, FALSE);
3210 }
3211 
3212 void
thread_drop_kevent_override(thread_t thread)3213 thread_drop_kevent_override(thread_t thread)
3214 {
3215 	struct task_pend_token pend_token = {};
3216 
3217 	spl_t s = splsched();
3218 	thread_lock(thread);
3219 
3220 	assert(thread->kevent_overrides > 0);
3221 
3222 	if (--thread->kevent_overrides == 0) {
3223 		/*
3224 		 * There are no more overrides for this thread, so we should
3225 		 * clear out the saturated override value
3226 		 */
3227 
3228 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3229 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3230 		    0, &pend_token);
3231 	}
3232 
3233 	thread_unlock(thread);
3234 	splx(s);
3235 
3236 	thread_policy_update_complete_unlocked(thread, &pend_token);
3237 }
3238 
3239 /*
3240  * Set the thread's QoS Workloop Servicer override
3241  * Owned by the Kevent subsystem
3242  *
3243  * May be called with spinlocks held, but not spinlocks
3244  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3245  *
3246  * One 'add' must be balanced by one 'drop'.
3247  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3248  * Before the thread is deallocated, there must be 0 remaining overrides.
3249  */
3250 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3251 thread_servicer_override(thread_t    thread,
3252     uint32_t    qos_override,
3253     boolean_t   is_new_override)
3254 {
3255 	struct task_pend_token pend_token = {};
3256 
3257 	spl_t s = splsched();
3258 	thread_lock(thread);
3259 
3260 	if (is_new_override) {
3261 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3262 	} else {
3263 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3264 	}
3265 
3266 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3267 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3268 	    qos_override, 0, &pend_token);
3269 
3270 	thread_unlock(thread);
3271 	splx(s);
3272 
3273 	assert(pend_token.tpt_update_sockets == 0);
3274 	thread_policy_update_complete_unlocked(thread, &pend_token);
3275 }
3276 
3277 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3278 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3279 {
3280 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3281 	assert(qos_override < THREAD_QOS_LAST);
3282 
3283 	thread_servicer_override(thread, qos_override, TRUE);
3284 }
3285 
3286 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3287 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3288 {
3289 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3290 	assert(qos_override < THREAD_QOS_LAST);
3291 
3292 	thread_servicer_override(thread, qos_override, FALSE);
3293 }
3294 
3295 void
thread_drop_servicer_override(thread_t thread)3296 thread_drop_servicer_override(thread_t thread)
3297 {
3298 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3299 }
3300 
3301 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3302 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3303 {
3304 	struct task_pend_token pend_token = {};
3305 	uint8_t current_iotier;
3306 
3307 	/* Check if the update is needed */
3308 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3309 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3310 
3311 	if (iotier_override == current_iotier) {
3312 		return;
3313 	}
3314 
3315 	spl_t s = splsched();
3316 	thread_lock(thread);
3317 
3318 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3319 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3320 	    iotier_override, 0, &pend_token);
3321 
3322 	thread_unlock(thread);
3323 	splx(s);
3324 
3325 	assert(pend_token.tpt_update_sockets == 0);
3326 	thread_policy_update_complete_unlocked(thread, &pend_token);
3327 }
3328 
3329 /* Get current requested qos / relpri, may be called from spinlock context */
3330 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3331 thread_get_requested_qos(thread_t thread, int *relpri)
3332 {
3333 	int relprio_value = 0;
3334 	thread_qos_t qos;
3335 
3336 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3337 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3338 	if (relpri) {
3339 		*relpri = -relprio_value;
3340 	}
3341 	return qos;
3342 }
3343 
3344 /*
3345  * This function will promote the thread priority
3346  * since exec could block other threads calling
3347  * proc_find on the proc. This boost must be removed
3348  * via call to thread_clear_exec_promotion.
3349  *
3350  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3351  */
3352 void
thread_set_exec_promotion(thread_t thread)3353 thread_set_exec_promotion(thread_t thread)
3354 {
3355 	spl_t s = splsched();
3356 	thread_lock(thread);
3357 
3358 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3359 
3360 	thread_unlock(thread);
3361 	splx(s);
3362 }
3363 
3364 /*
3365  * This function will clear the exec thread
3366  * promotion set on the thread by thread_set_exec_promotion.
3367  */
3368 void
thread_clear_exec_promotion(thread_t thread)3369 thread_clear_exec_promotion(thread_t thread)
3370 {
3371 	spl_t s = splsched();
3372 	thread_lock(thread);
3373 
3374 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3375 
3376 	thread_unlock(thread);
3377 	splx(s);
3378 }
3379 
3380 #if CONFIG_SCHED_RT_ALLOW
3381 
3382 /*
3383  * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3384  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3385  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3386  */
3387 static TUNABLE(
3388 	bool,
3389 	rt_allow_policy_enabled,
3390 	"-rt-allow_policy-enable",
3391 #if XNU_TARGET_OS_XR
3392 	true
3393 #else
3394 	false
3395 #endif /* XNU_TARGET_OS_XR */
3396 	);
3397 
3398 /*
3399  * When the RT allow policy is enabled and a thread allowed to become RT,
3400  * sometimes (if the processes RT allow policy is restricted) the thread will
3401  * have a CPU limit enforced. The following two tunables determine the
3402  * parameters for that CPU limit.
3403  */
3404 
3405 /* % of the interval allowed to run. */
3406 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3407     "rt_allow_limit_percent", 70);
3408 
3409 /* The length of interval in nanoseconds. */
3410 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3411     "rt_allow_limit_interval", 10);
3412 
3413 static bool
thread_has_rt(thread_t thread)3414 thread_has_rt(thread_t thread)
3415 {
3416 	return
3417 	        thread->sched_mode == TH_MODE_REALTIME ||
3418 	        thread->saved_mode == TH_MODE_REALTIME;
3419 }
3420 
3421 /*
3422  * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3423  * up by the target thread via the ledger AST.
3424  */
3425 static void
thread_rt_set_cpulimit(thread_t thread)3426 thread_rt_set_cpulimit(thread_t thread)
3427 {
3428 	/* Force reasonable values for the cpu limit. */
3429 	const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3430 	const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3431 
3432 	thread->t_ledger_req_percentage = percent;
3433 	thread->t_ledger_req_interval_ms = interval_ms;
3434 	thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3435 
3436 	thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3437 }
3438 
3439 /* Similar to the above but removes any CPU limit. */
3440 static void
thread_rt_clear_cpulimit(thread_t thread)3441 thread_rt_clear_cpulimit(thread_t thread)
3442 {
3443 	thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3444 
3445 	thread->t_ledger_req_percentage = 0;
3446 	thread->t_ledger_req_interval_ms = 0;
3447 	thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3448 }
3449 
3450 /*
3451  * Evaluate RT policy for a thread, demoting and undemoting as needed.
3452  */
3453 void
thread_rt_evaluate(thread_t thread)3454 thread_rt_evaluate(thread_t thread)
3455 {
3456 	task_t task = get_threadtask(thread);
3457 	bool platform_binary = false;
3458 
3459 	/* If the RT allow policy is not enabled - nothing to do. */
3460 	if (!rt_allow_policy_enabled) {
3461 		return;
3462 	}
3463 
3464 	/* User threads only. */
3465 	if (task == kernel_task) {
3466 		return;
3467 	}
3468 
3469 	/* Check for platform binary. */
3470 	platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3471 
3472 	spl_t s = splsched();
3473 	thread_lock(thread);
3474 
3475 	const thread_work_interval_flags_t wi_flags =
3476 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3477 
3478 	/*
3479 	 * RT threads which are not joined to a work interval which allows RT
3480 	 * threads are demoted. Once those conditions no longer hold, the thread
3481 	 * undemoted.
3482 	 */
3483 	if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3484 		if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3485 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3486 			    thread_tid(thread));
3487 			sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3488 		}
3489 	} else {
3490 		if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3491 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3492 		}
3493 	}
3494 
3495 	/*
3496 	 * RT threads get a CPU limit unless they're part of a platform binary
3497 	 * task. If the thread is no longer RT, any existing CPU limit should be
3498 	 * removed.
3499 	 */
3500 	bool set_ast = false;
3501 	if (!platform_binary &&
3502 	    thread_has_rt(thread) &&
3503 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3504 		thread_rt_set_cpulimit(thread);
3505 		set_ast = true;
3506 	}
3507 
3508 	if (!platform_binary &&
3509 	    !thread_has_rt(thread) &&
3510 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3511 		thread_rt_clear_cpulimit(thread);
3512 		set_ast = true;
3513 	}
3514 
3515 	thread_unlock(thread);
3516 	splx(s);
3517 
3518 	if (set_ast) {
3519 		/* Ensure the target thread picks up any CPU limit change. */
3520 		act_set_astledger(thread);
3521 	}
3522 }
3523 
3524 #else
3525 
3526 void
thread_rt_evaluate(__unused thread_t thread)3527 thread_rt_evaluate(__unused thread_t thread)
3528 {
3529 }
3530 
3531 #endif /*  CONFIG_SCHED_RT_ALLOW */
3532