xref: /xnu-10002.61.3/osfmk/kern/thread_policy.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42 
43 #include <mach/machine/sdt.h>
44 
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46     struct thread_qos_override, KT_DEFAULT);
47 
48 #ifdef MACH_BSD
49 extern int      proc_selfpid(void);
50 extern char *   proc_name_address(void *p);
51 extern void     rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53 
54 #define QOS_EXTRACT(q)        ((q) & 0xff)
55 
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60 
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63 
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66 
67 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
68 
69 const struct thread_requested_policy default_thread_requested_policy = {
70 	.thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72 
73 /*
74  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75  * to threads that don't have a QoS class set.
76  */
77 const qos_policy_params_t thread_qos_policy_params = {
78 	/*
79 	 * This table defines the starting base priority of the thread,
80 	 * which will be modified by the thread importance and the task max priority
81 	 * before being applied.
82 	 */
83 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
84 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
85 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
86 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
87 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
88 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
89 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
90 
91 	/*
92 	 * This table defines the highest IO priority that a thread marked with this
93 	 * QoS class can have.
94 	 */
95 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
100 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
102 
103 	/*
104 	 * This table defines the highest QoS level that
105 	 * a thread marked with this QoS class can have.
106 	 */
107 
108 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 
116 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124 
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127 
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130 
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133 
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136 
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139 
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142 
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145 
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148 
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151 
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154 
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160 
161 
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164     task_pend_token_t pend_token)
165 {
166 	__unused int prev_qos = thread->requested_policy.thrp_qos;
167 
168 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169 
170 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173 
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 	struct task_pend_token pend_token = {};
178 
179 	thread_mtx_lock(thread);
180 	if (!thread->active) {
181 		thread_mtx_unlock(thread);
182 		return KERN_TERMINATED;
183 	}
184 
185 	thread_remove_qos_policy_locked(thread, &pend_token);
186 
187 	thread_mtx_unlock(thread);
188 
189 	thread_policy_update_complete_unlocked(thread, &pend_token);
190 
191 	return KERN_SUCCESS;
192 }
193 
194 
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 	if (thread->static_param) {
199 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 		return TRUE;
201 	}
202 	return FALSE;
203 }
204 
205 /*
206  * Relative priorities can range between 0REL and -15REL. These
207  * map to QoS-specific ranges, to create non-overlapping priority
208  * ranges.
209  */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 	int next_lower_qos;
214 
215 	/* Fast path, since no validation or scaling is needed */
216 	if (qos_relprio == 0) {
217 		return 0;
218 	}
219 
220 	switch (qos) {
221 	case THREAD_QOS_USER_INTERACTIVE:
222 		next_lower_qos = THREAD_QOS_USER_INITIATED;
223 		break;
224 	case THREAD_QOS_USER_INITIATED:
225 		next_lower_qos = THREAD_QOS_LEGACY;
226 		break;
227 	case THREAD_QOS_LEGACY:
228 		next_lower_qos = THREAD_QOS_UTILITY;
229 		break;
230 	case THREAD_QOS_UTILITY:
231 		next_lower_qos = THREAD_QOS_BACKGROUND;
232 		break;
233 	case THREAD_QOS_MAINTENANCE:
234 	case THREAD_QOS_BACKGROUND:
235 		next_lower_qos = 0;
236 		break;
237 	default:
238 		panic("Unrecognized QoS %d", qos);
239 		return 0;
240 	}
241 
242 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244 
245 	/*
246 	 * We now have the valid range that the scaled relative priority can map to. Note
247 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 	 * remainder.
251 	 */
252 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253 
254 	return scaled_relprio;
255 }
256 
257 /*
258  * flag set by -qos-policy-allow boot-arg to allow
259  * testing thread qos policy from userspace
260  */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262 
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 	thread_t                                thread,
266 	thread_policy_flavor_t  flavor,
267 	thread_policy_t                 policy_info,
268 	mach_msg_type_number_t  count)
269 {
270 	thread_qos_policy_data_t req_qos;
271 	kern_return_t kr;
272 
273 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274 
275 	if (thread == THREAD_NULL) {
276 		return KERN_INVALID_ARGUMENT;
277 	}
278 
279 	if (!allow_qos_policy_set) {
280 		if (thread_is_static_param(thread)) {
281 			return KERN_POLICY_STATIC;
282 		}
283 
284 		if (flavor == THREAD_QOS_POLICY) {
285 			return KERN_INVALID_ARGUMENT;
286 		}
287 
288 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 				return KERN_INVALID_ARGUMENT;
291 			}
292 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 			if (info->priority != BASEPRI_RTQUEUES) {
294 				return KERN_INVALID_ARGUMENT;
295 			}
296 		}
297 	}
298 
299 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 			&thread->th_work_interval_flags, relaxed);
302 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 			/* Fail requests to become realtime for threads having joined workintervals
305 			 * with workload ID that don't have the rt-allowed flag. */
306 			return KERN_INVALID_POLICY;
307 		}
308 	}
309 
310 	/* Threads without static_param set reset their QoS when other policies are applied. */
311 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 		/* Store the existing tier, if we fail this call it is used to reset back. */
313 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315 
316 		kr = thread_remove_qos_policy(thread);
317 		if (kr != KERN_SUCCESS) {
318 			return kr;
319 		}
320 	}
321 
322 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323 
324 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 		if (kr != KERN_SUCCESS) {
326 			/* Reset back to our original tier as the set failed. */
327 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 		}
329 	}
330 
331 	return kr;
332 }
333 
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338 
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 	thread_t                     thread,
342 	thread_policy_flavor_t       flavor,
343 	thread_policy_t              policy_info,
344 	mach_msg_type_number_t       count)
345 {
346 	kern_return_t result = KERN_SUCCESS;
347 	struct task_pend_token pend_token = {};
348 
349 	thread_mtx_lock(thread);
350 	if (!thread->active) {
351 		thread_mtx_unlock(thread);
352 
353 		return KERN_TERMINATED;
354 	}
355 
356 	switch (flavor) {
357 	case THREAD_EXTENDED_POLICY:
358 	{
359 		boolean_t timeshare = TRUE;
360 
361 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 			thread_extended_policy_t info;
363 
364 			info = (thread_extended_policy_t)policy_info;
365 			timeshare = info->timeshare;
366 		}
367 
368 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369 
370 		spl_t s = splsched();
371 		thread_lock(thread);
372 
373 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374 
375 		thread_unlock(thread);
376 		splx(s);
377 
378 		/*
379 		 * The thread may be demoted with RT_DISALLOWED but has just
380 		 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 		 * undemote the thread so the new sched mode takes effect.
382 		 */
383 		thread_rt_evaluate(thread);
384 
385 		pend_token.tpt_update_thread_sfi = 1;
386 
387 		break;
388 	}
389 
390 	case THREAD_TIME_CONSTRAINT_POLICY:
391 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 	{
393 		thread_time_constraint_with_priority_policy_t info;
394 
395 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398 
399 		if (count < min_count) {
400 			result = KERN_INVALID_ARGUMENT;
401 			break;
402 		}
403 
404 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
405 
406 
407 		if (info->constraint < info->computation ||
408 		    info->computation > max_rt_quantum ||
409 		    info->computation < min_rt_quantum) {
410 			result = KERN_INVALID_ARGUMENT;
411 			break;
412 		}
413 
414 		if (info->computation < (info->constraint / 2)) {
415 			info->computation = (info->constraint / 2);
416 			if (info->computation > max_rt_quantum) {
417 				info->computation = max_rt_quantum;
418 			}
419 		}
420 
421 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 				result = KERN_INVALID_ARGUMENT;
424 				break;
425 			}
426 		}
427 
428 		spl_t s = splsched();
429 		thread_lock(thread);
430 
431 		thread->realtime.period          = info->period;
432 		thread->realtime.computation     = info->computation;
433 		thread->realtime.constraint      = info->constraint;
434 		thread->realtime.preemptible     = info->preemptible;
435 
436 		/*
437 		 * If the thread has a work interval driven policy, the priority
438 		 * offset has been set by the work interval.
439 		 */
440 		if (!thread->requested_policy.thrp_wi_driven) {
441 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 				thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 			} else {
444 				thread->realtime.priority_offset = 0;
445 			}
446 		}
447 
448 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449 
450 		thread_unlock(thread);
451 		splx(s);
452 
453 		thread_rt_evaluate(thread);
454 
455 		pend_token.tpt_update_thread_sfi = 1;
456 
457 		break;
458 	}
459 
460 	case THREAD_PRECEDENCE_POLICY:
461 	{
462 		thread_precedence_policy_t info;
463 
464 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 			result = KERN_INVALID_ARGUMENT;
466 			break;
467 		}
468 		info = (thread_precedence_policy_t)policy_info;
469 
470 		spl_t s = splsched();
471 		thread_lock(thread);
472 
473 		thread->importance = info->importance;
474 
475 		thread_recompute_priority(thread);
476 
477 		thread_unlock(thread);
478 		splx(s);
479 
480 		break;
481 	}
482 
483 	case THREAD_AFFINITY_POLICY:
484 	{
485 		extern boolean_t affinity_sets_enabled;
486 		thread_affinity_policy_t info;
487 
488 		if (!affinity_sets_enabled) {
489 			result = KERN_INVALID_POLICY;
490 			break;
491 		}
492 
493 		if (!thread_affinity_is_supported()) {
494 			result = KERN_NOT_SUPPORTED;
495 			break;
496 		}
497 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 			result = KERN_INVALID_ARGUMENT;
499 			break;
500 		}
501 
502 		info = (thread_affinity_policy_t) policy_info;
503 		/*
504 		 * Unlock the thread mutex here and
505 		 * return directly after calling thread_affinity_set().
506 		 * This is necessary for correct lock ordering because
507 		 * thread_affinity_set() takes the task lock.
508 		 */
509 		thread_mtx_unlock(thread);
510 		return thread_affinity_set(thread, info->affinity_tag);
511 	}
512 
513 #if !defined(XNU_TARGET_OS_OSX)
514 	case THREAD_BACKGROUND_POLICY:
515 	{
516 		thread_background_policy_t info;
517 
518 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 			result = KERN_INVALID_ARGUMENT;
520 			break;
521 		}
522 
523 		if (get_threadtask(thread) != current_task()) {
524 			result = KERN_PROTECTION_FAILURE;
525 			break;
526 		}
527 
528 		info = (thread_background_policy_t) policy_info;
529 
530 		int enable;
531 
532 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 			enable = TASK_POLICY_ENABLE;
534 		} else {
535 			enable = TASK_POLICY_DISABLE;
536 		}
537 
538 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539 
540 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541 
542 		break;
543 	}
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545 
546 	case THREAD_THROUGHPUT_QOS_POLICY:
547 	{
548 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 		thread_throughput_qos_t tqos;
550 
551 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 			result = KERN_INVALID_ARGUMENT;
553 			break;
554 		}
555 
556 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 			break;
558 		}
559 
560 		tqos = qos_extract(info->thread_throughput_qos_tier);
561 
562 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564 
565 		break;
566 	}
567 
568 	case THREAD_LATENCY_QOS_POLICY:
569 	{
570 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 		thread_latency_qos_t lqos;
572 
573 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 			result = KERN_INVALID_ARGUMENT;
575 			break;
576 		}
577 
578 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 			break;
580 		}
581 
582 		lqos = qos_extract(info->thread_latency_qos_tier);
583 
584 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586 
587 		break;
588 	}
589 
590 	case THREAD_QOS_POLICY:
591 	{
592 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593 
594 		if (count < THREAD_QOS_POLICY_COUNT) {
595 			result = KERN_INVALID_ARGUMENT;
596 			break;
597 		}
598 
599 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 			result = KERN_INVALID_ARGUMENT;
601 			break;
602 		}
603 
604 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 			result = KERN_INVALID_ARGUMENT;
606 			break;
607 		}
608 
609 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 			result = KERN_INVALID_ARGUMENT;
611 			break;
612 		}
613 
614 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 		    info->qos_tier, -info->tier_importance, &pend_token);
616 
617 		break;
618 	}
619 
620 	default:
621 		result = KERN_INVALID_ARGUMENT;
622 		break;
623 	}
624 
625 	thread_mtx_unlock(thread);
626 
627 	thread_policy_update_complete_unlocked(thread, &pend_token);
628 
629 	return result;
630 }
631 
632 /*
633  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634  * Both result in FIXED mode scheduling.
635  */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 	switch (policy) {
640 	case POLICY_TIMESHARE:
641 		return TH_MODE_TIMESHARE;
642 	case POLICY_RR:
643 	case POLICY_FIFO:
644 		return TH_MODE_FIXED;
645 	default:
646 		panic("unexpected sched policy: %d", policy);
647 		return TH_MODE_NONE;
648 	}
649 }
650 
651 /*
652  * Called either with the thread mutex locked
653  * or from the pthread kext in a 'safe place'.
654  */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
657     sched_mode_t          mode,
658     integer_t             priority,
659     task_pend_token_t     pend_token)
660 {
661 	kern_return_t kr = KERN_SUCCESS;
662 
663 	spl_t s = splsched();
664 	thread_lock(thread);
665 
666 	/* This path isn't allowed to change a thread out of realtime. */
667 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 	    (thread->saved_mode == TH_MODE_REALTIME)) {
669 		kr = KERN_FAILURE;
670 		goto unlock;
671 	}
672 
673 	if (thread->policy_reset) {
674 		kr = KERN_SUCCESS;
675 		goto unlock;
676 	}
677 
678 	sched_mode_t old_mode = thread->sched_mode;
679 	integer_t old_base_pri = thread->base_pri;
680 	integer_t old_sched_pri = thread->sched_pri;
681 
682 	/*
683 	 * Reverse engineer and apply the correct importance value
684 	 * from the requested absolute priority value.
685 	 *
686 	 * TODO: Store the absolute priority value instead
687 	 */
688 
689 	if (priority >= thread->max_priority) {
690 		priority = thread->max_priority - thread->task_priority;
691 	} else if (priority >= MINPRI_KERNEL) {
692 		priority -=  MINPRI_KERNEL;
693 	} else if (priority >= MINPRI_RESERVED) {
694 		priority -=  MINPRI_RESERVED;
695 	} else {
696 		priority -= BASEPRI_DEFAULT;
697 	}
698 
699 	priority += thread->task_priority;
700 
701 	if (priority > thread->max_priority) {
702 		priority = thread->max_priority;
703 	} else if (priority < MINPRI) {
704 		priority = MINPRI;
705 	}
706 
707 	thread->importance = priority - thread->task_priority;
708 
709 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
710 
711 	if (mode != old_mode) {
712 		pend_token->tpt_update_thread_sfi = 1;
713 	}
714 
715 	if (thread->base_pri != old_base_pri ||
716 	    thread->sched_pri != old_sched_pri) {
717 		pend_token->tpt_update_turnstile = 1;
718 	}
719 
720 unlock:
721 	thread_unlock(thread);
722 	splx(s);
723 
724 	return kr;
725 }
726 
727 void
thread_freeze_base_pri(thread_t thread)728 thread_freeze_base_pri(thread_t thread)
729 {
730 	assert(thread == current_thread());
731 
732 	spl_t s = splsched();
733 	thread_lock(thread);
734 
735 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
736 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
737 
738 	thread_unlock(thread);
739 	splx(s);
740 }
741 
742 bool
thread_unfreeze_base_pri(thread_t thread)743 thread_unfreeze_base_pri(thread_t thread)
744 {
745 	assert(thread == current_thread());
746 	integer_t base_pri;
747 	ast_t ast = 0;
748 
749 	spl_t s = splsched();
750 	thread_lock(thread);
751 
752 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
753 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
754 
755 	base_pri = thread->req_base_pri;
756 	if (base_pri != thread->base_pri) {
757 		/*
758 		 * This function returns "true" if the base pri change
759 		 * is the most likely cause for the preemption.
760 		 */
761 		sched_set_thread_base_priority(thread, base_pri);
762 		ast = ast_peek(AST_PREEMPT);
763 	}
764 
765 	thread_unlock(thread);
766 	splx(s);
767 
768 	return ast != 0;
769 }
770 
771 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)772 thread_workq_pri_for_qos(thread_qos_t qos)
773 {
774 	assert(qos < THREAD_QOS_LAST);
775 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
776 }
777 
778 thread_qos_t
thread_workq_qos_for_pri(int priority)779 thread_workq_qos_for_pri(int priority)
780 {
781 	thread_qos_t qos;
782 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
783 		// indicate that workq should map >UI threads to workq's
784 		// internal notation for above-UI work.
785 		return THREAD_QOS_UNSPECIFIED;
786 	}
787 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
788 		// map a given priority up to the next nearest qos band.
789 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
790 			return qos;
791 		}
792 	}
793 	return THREAD_QOS_MAINTENANCE;
794 }
795 
796 /*
797  * private interface for pthread workqueues
798  *
799  * Set scheduling policy & absolute priority for thread
800  * May be called with spinlocks held
801  * Thread mutex lock is not held
802  */
803 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)804 thread_reset_workq_qos(thread_t thread, uint32_t qos)
805 {
806 	struct task_pend_token pend_token = {};
807 
808 	assert(qos < THREAD_QOS_LAST);
809 
810 	spl_t s = splsched();
811 	thread_lock(thread);
812 
813 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
814 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
815 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
816 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
817 	    &pend_token);
818 
819 	assert(pend_token.tpt_update_sockets == 0);
820 
821 	thread_unlock(thread);
822 	splx(s);
823 
824 	thread_policy_update_complete_unlocked(thread, &pend_token);
825 }
826 
827 /*
828  * private interface for pthread workqueues
829  *
830  * Set scheduling policy & absolute priority for thread
831  * May be called with spinlocks held
832  * Thread mutex lock is held
833  */
834 void
thread_set_workq_override(thread_t thread,uint32_t qos)835 thread_set_workq_override(thread_t thread, uint32_t qos)
836 {
837 	struct task_pend_token pend_token = {};
838 
839 	assert(qos < THREAD_QOS_LAST);
840 
841 	spl_t s = splsched();
842 	thread_lock(thread);
843 
844 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
845 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
846 
847 	assert(pend_token.tpt_update_sockets == 0);
848 
849 	thread_unlock(thread);
850 	splx(s);
851 
852 	thread_policy_update_complete_unlocked(thread, &pend_token);
853 }
854 
855 /*
856  * private interface for pthread workqueues
857  *
858  * Set scheduling policy & absolute priority for thread
859  * May be called with spinlocks held
860  * Thread mutex lock is not held
861  */
862 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)863 thread_set_workq_pri(thread_t  thread,
864     thread_qos_t qos,
865     integer_t priority,
866     integer_t policy)
867 {
868 	struct task_pend_token pend_token = {};
869 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
870 
871 	assert(qos < THREAD_QOS_LAST);
872 	assert(thread->static_param);
873 
874 	if (!thread->static_param || !thread->active) {
875 		return;
876 	}
877 
878 	spl_t s = splsched();
879 	thread_lock(thread);
880 
881 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
882 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
883 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
884 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
885 	    0, &pend_token);
886 
887 	thread_unlock(thread);
888 	splx(s);
889 
890 	/* Concern: this doesn't hold the mutex... */
891 
892 	__assert_only kern_return_t kr;
893 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
894 	    &pend_token);
895 	assert(kr == KERN_SUCCESS);
896 
897 	assert(pend_token.tpt_update_sockets == 0);
898 
899 	thread_policy_update_complete_unlocked(thread, &pend_token);
900 }
901 
902 /*
903  * thread_set_mode_and_absolute_pri:
904  *
905  * Set scheduling policy & absolute priority for thread, for deprecated
906  * thread_set_policy and thread_policy interfaces.
907  *
908  * Called with nothing locked.
909  */
910 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)911 thread_set_mode_and_absolute_pri(thread_t   thread,
912     integer_t  policy,
913     integer_t  priority)
914 {
915 	kern_return_t kr = KERN_SUCCESS;
916 	struct task_pend_token pend_token = {};
917 
918 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
919 
920 	thread_mtx_lock(thread);
921 
922 	if (!thread->active) {
923 		kr = KERN_TERMINATED;
924 		goto unlock;
925 	}
926 
927 	if (thread_is_static_param(thread)) {
928 		kr = KERN_POLICY_STATIC;
929 		goto unlock;
930 	}
931 
932 	/* Setting legacy policies on threads kills the current QoS */
933 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
934 		thread_remove_qos_policy_locked(thread, &pend_token);
935 	}
936 
937 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
938 
939 unlock:
940 	thread_mtx_unlock(thread);
941 
942 	thread_policy_update_complete_unlocked(thread, &pend_token);
943 
944 	return kr;
945 }
946 
947 /*
948  * Set the thread's requested mode and recompute priority
949  * Called with thread mutex and thread locked
950  *
951  * TODO: Mitigate potential problems caused by moving thread to end of runq
952  * whenever its priority is recomputed
953  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
954  */
955 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)956 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
957 {
958 	if (thread->policy_reset) {
959 		return;
960 	}
961 
962 	boolean_t removed = thread_run_queue_remove(thread);
963 
964 	sched_set_thread_mode_user(thread, mode);
965 
966 	thread_recompute_priority(thread);
967 
968 	if (removed) {
969 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
970 	}
971 }
972 
973 /* called at splsched with thread lock locked */
974 static void
thread_update_qos_cpu_time_locked(thread_t thread)975 thread_update_qos_cpu_time_locked(thread_t thread)
976 {
977 	task_t task = get_threadtask(thread);
978 	uint64_t timer_sum, timer_delta;
979 
980 	/*
981 	 * This is only as accurate the thread's last context switch or user/kernel
982 	 * transition (unless precise user/kernel time is disabled).
983 	 *
984 	 * TODO: Consider running an update operation here to update it first.
985 	 *       Maybe doable with interrupts disabled from current thread.
986 	 *       If the thread is on a different core, may not be easy to get right.
987 	 */
988 
989 	timer_sum = recount_thread_time_mach(thread);
990 	timer_delta = timer_sum - thread->vtimer_qos_save;
991 
992 	thread->vtimer_qos_save = timer_sum;
993 
994 	uint64_t* task_counter = NULL;
995 
996 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
997 	switch (thread->effective_policy.thep_qos) {
998 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
999 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
1000 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1001 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1002 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1003 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1004 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1005 	default:
1006 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1007 	}
1008 
1009 	OSAddAtomic64(timer_delta, task_counter);
1010 
1011 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1012 	switch (thread->requested_policy.thrp_qos) {
1013 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1014 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1015 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1016 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1017 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1018 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1019 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1020 	default:
1021 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1022 	}
1023 
1024 	OSAddAtomic64(timer_delta, task_counter);
1025 }
1026 
1027 /*
1028  * called with no thread locks held
1029  * may hold task lock
1030  */
1031 void
thread_update_qos_cpu_time(thread_t thread)1032 thread_update_qos_cpu_time(thread_t thread)
1033 {
1034 	thread_mtx_lock(thread);
1035 
1036 	spl_t s = splsched();
1037 	thread_lock(thread);
1038 
1039 	thread_update_qos_cpu_time_locked(thread);
1040 
1041 	thread_unlock(thread);
1042 	splx(s);
1043 
1044 	thread_mtx_unlock(thread);
1045 }
1046 
1047 /*
1048  * Calculate base priority from thread attributes, and set it on the thread
1049  *
1050  * Called with thread_lock and thread mutex held.
1051  */
1052 void
thread_recompute_priority(thread_t thread)1053 thread_recompute_priority(
1054 	thread_t                thread)
1055 {
1056 	integer_t               priority;
1057 	integer_t               adj_priority;
1058 	bool                    wi_priority = false;
1059 
1060 	if (thread->policy_reset) {
1061 		return;
1062 	}
1063 
1064 	if (thread->sched_mode == TH_MODE_REALTIME) {
1065 		uint8_t i = thread->realtime.priority_offset;
1066 		assert((i >= 0) && (i < NRTQS));
1067 		priority = BASEPRI_RTQUEUES + i;
1068 
1069 		sched_set_thread_base_priority(thread, priority);
1070 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1071 			/* Make sure the thread has a valid deadline */
1072 			uint64_t ctime = mach_absolute_time();
1073 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1074 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1075 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1076 		}
1077 		return;
1078 
1079 		/*
1080 		 * A thread may have joined a RT work interval but then never
1081 		 * changed its sched mode or have been demoted. RT work
1082 		 * intervals will have RT priorities - ignore the priority if
1083 		 * the thread isn't RT.
1084 		 */
1085 	} else if (thread->effective_policy.thep_wi_driven &&
1086 	    work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1087 		priority = work_interval_get_priority(thread);
1088 		wi_priority = true;
1089 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1090 		int qos = thread->effective_policy.thep_qos;
1091 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1092 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1093 		int qos_scaled_relprio;
1094 
1095 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1096 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1097 
1098 		priority = thread_qos_policy_params.qos_pri[qos];
1099 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1100 
1101 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1102 			/* Bump priority 46 to 47 when in a frontmost app */
1103 			qos_scaled_relprio += 1;
1104 		}
1105 
1106 		/* TODO: factor in renice priority here? */
1107 
1108 		priority += qos_scaled_relprio;
1109 	} else {
1110 		if (thread->importance > MAXPRI) {
1111 			priority = MAXPRI;
1112 		} else if (thread->importance < -MAXPRI) {
1113 			priority = -MAXPRI;
1114 		} else {
1115 			priority = thread->importance;
1116 		}
1117 
1118 		priority += thread->task_priority;
1119 	}
1120 
1121 	/* Boost the priority of threads which are RT demoted. */
1122 	if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1123 		priority = MAX(priority, MAXPRI_USER);
1124 	}
1125 
1126 	priority = MAX(priority, thread->user_promotion_basepri);
1127 
1128 	/*
1129 	 * Clamp priority back into the allowed range for this task.
1130 	 *  The initial priority value could be out of this range due to:
1131 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1132 	 *      Task is user task (max-pri is 63)
1133 	 *      Task is kernel task (max-pri is 95)
1134 	 * Note that thread->importance is user-settable to any integer
1135 	 * via THREAD_PRECEDENCE_POLICY.
1136 	 */
1137 	adj_priority = priority;
1138 	adj_priority = MIN(adj_priority, thread->max_priority);
1139 	adj_priority = MAX(adj_priority, MINPRI);
1140 
1141 	/* Allow workload driven priorities to exceed max_priority. */
1142 	if (wi_priority) {
1143 		adj_priority = MAX(adj_priority, priority);
1144 	}
1145 
1146 	/* Allow priority to exceed max_priority for promotions. */
1147 	if (thread->effective_policy.thep_promote_above_task) {
1148 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1149 	}
1150 	priority = adj_priority;
1151 	assert3u(priority, <=, MAXPRI);
1152 
1153 	if (thread->saved_mode == TH_MODE_REALTIME &&
1154 	    sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1155 		priority = DEPRESSPRI;
1156 	}
1157 
1158 	if (thread->effective_policy.thep_terminated == TRUE) {
1159 		/*
1160 		 * We temporarily want to override the expected priority to
1161 		 * ensure that the thread exits in a timely manner.
1162 		 * Note that this is allowed to exceed thread->max_priority
1163 		 * so that the thread is no longer clamped to background
1164 		 * during the final exit phase.
1165 		 */
1166 		if (priority < thread->task_priority) {
1167 			priority = thread->task_priority;
1168 		}
1169 		if (priority < BASEPRI_DEFAULT) {
1170 			priority = BASEPRI_DEFAULT;
1171 		}
1172 	}
1173 
1174 #if !defined(XNU_TARGET_OS_OSX)
1175 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1176 	if (priority < MAXPRI_THROTTLE) {
1177 		priority = MAXPRI_THROTTLE;
1178 	}
1179 #endif /* !defined(XNU_TARGET_OS_OSX) */
1180 
1181 	sched_set_thread_base_priority(thread, priority);
1182 }
1183 
1184 /* Called with the task lock held, but not the thread mutex or spinlock */
1185 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1186 thread_policy_update_tasklocked(
1187 	thread_t           thread,
1188 	integer_t          priority,
1189 	integer_t          max_priority,
1190 	task_pend_token_t  pend_token)
1191 {
1192 	thread_mtx_lock(thread);
1193 
1194 	if (!thread->active || thread->policy_reset) {
1195 		thread_mtx_unlock(thread);
1196 		return;
1197 	}
1198 
1199 	spl_t s = splsched();
1200 	thread_lock(thread);
1201 
1202 	__unused
1203 	integer_t old_max_priority = thread->max_priority;
1204 
1205 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1206 	thread->task_priority = (int16_t)priority;
1207 
1208 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1209 	thread->max_priority = (int16_t)max_priority;
1210 
1211 	/*
1212 	 * When backgrounding a thread, realtime and fixed priority threads
1213 	 * should be demoted to timeshare background threads.
1214 	 *
1215 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1216 	 * remove/reinsert for a runnable thread
1217 	 */
1218 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1219 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1220 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1221 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1222 	}
1223 
1224 	thread_policy_update_spinlocked(thread, true, pend_token);
1225 
1226 	thread_unlock(thread);
1227 	splx(s);
1228 
1229 	thread_mtx_unlock(thread);
1230 }
1231 
1232 /*
1233  * Reset thread to default state in preparation for termination
1234  * Called with thread mutex locked
1235  *
1236  * Always called on current thread, so we don't need a run queue remove
1237  */
1238 void
thread_policy_reset(thread_t thread)1239 thread_policy_reset(
1240 	thread_t                thread)
1241 {
1242 	spl_t           s;
1243 
1244 	assert(thread == current_thread());
1245 
1246 	s = splsched();
1247 	thread_lock(thread);
1248 
1249 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1250 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1251 	}
1252 
1253 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1254 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1255 	}
1256 
1257 	if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1258 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1259 	}
1260 
1261 	/* At this point, the various demotions should be inactive */
1262 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1263 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1264 
1265 	/* Reset thread back to task-default basepri and mode  */
1266 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1267 
1268 	sched_set_thread_mode(thread, newmode);
1269 
1270 	thread->importance = 0;
1271 
1272 	/* Prevent further changes to thread base priority or mode */
1273 	thread->policy_reset = 1;
1274 
1275 	sched_set_thread_base_priority(thread, thread->task_priority);
1276 
1277 	thread_unlock(thread);
1278 	splx(s);
1279 }
1280 
1281 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1282 thread_policy_get(
1283 	thread_t                                thread,
1284 	thread_policy_flavor_t  flavor,
1285 	thread_policy_t                 policy_info,
1286 	mach_msg_type_number_t  *count,
1287 	boolean_t                               *get_default)
1288 {
1289 	kern_return_t                   result = KERN_SUCCESS;
1290 
1291 	if (thread == THREAD_NULL) {
1292 		return KERN_INVALID_ARGUMENT;
1293 	}
1294 
1295 	thread_mtx_lock(thread);
1296 	if (!thread->active) {
1297 		thread_mtx_unlock(thread);
1298 
1299 		return KERN_TERMINATED;
1300 	}
1301 
1302 	switch (flavor) {
1303 	case THREAD_EXTENDED_POLICY:
1304 	{
1305 		boolean_t               timeshare = TRUE;
1306 
1307 		if (!(*get_default)) {
1308 			spl_t s = splsched();
1309 			thread_lock(thread);
1310 
1311 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1312 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1313 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1314 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1315 				} else {
1316 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1317 				}
1318 			} else {
1319 				*get_default = TRUE;
1320 			}
1321 
1322 			thread_unlock(thread);
1323 			splx(s);
1324 		}
1325 
1326 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1327 			thread_extended_policy_t        info;
1328 
1329 			info = (thread_extended_policy_t)policy_info;
1330 			info->timeshare = timeshare;
1331 		}
1332 
1333 		break;
1334 	}
1335 
1336 	case THREAD_TIME_CONSTRAINT_POLICY:
1337 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1338 	{
1339 		thread_time_constraint_with_priority_policy_t         info;
1340 
1341 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1342 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1343 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1344 
1345 		if (*count < min_count) {
1346 			result = KERN_INVALID_ARGUMENT;
1347 			break;
1348 		}
1349 
1350 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1351 
1352 		if (!(*get_default)) {
1353 			spl_t s = splsched();
1354 			thread_lock(thread);
1355 
1356 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1357 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1358 				info->period = thread->realtime.period;
1359 				info->computation = thread->realtime.computation;
1360 				info->constraint = thread->realtime.constraint;
1361 				info->preemptible = thread->realtime.preemptible;
1362 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1363 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1364 				}
1365 			} else {
1366 				*get_default = TRUE;
1367 			}
1368 
1369 			thread_unlock(thread);
1370 			splx(s);
1371 		}
1372 
1373 		if (*get_default) {
1374 			info->period = 0;
1375 			info->computation = default_timeshare_computation;
1376 			info->constraint = default_timeshare_constraint;
1377 			info->preemptible = TRUE;
1378 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1379 				info->priority = BASEPRI_RTQUEUES;
1380 			}
1381 		}
1382 
1383 
1384 		break;
1385 	}
1386 
1387 	case THREAD_PRECEDENCE_POLICY:
1388 	{
1389 		thread_precedence_policy_t              info;
1390 
1391 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1392 			result = KERN_INVALID_ARGUMENT;
1393 			break;
1394 		}
1395 
1396 		info = (thread_precedence_policy_t)policy_info;
1397 
1398 		if (!(*get_default)) {
1399 			spl_t s = splsched();
1400 			thread_lock(thread);
1401 
1402 			info->importance = thread->importance;
1403 
1404 			thread_unlock(thread);
1405 			splx(s);
1406 		} else {
1407 			info->importance = 0;
1408 		}
1409 
1410 		break;
1411 	}
1412 
1413 	case THREAD_AFFINITY_POLICY:
1414 	{
1415 		thread_affinity_policy_t                info;
1416 
1417 		if (!thread_affinity_is_supported()) {
1418 			result = KERN_NOT_SUPPORTED;
1419 			break;
1420 		}
1421 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1422 			result = KERN_INVALID_ARGUMENT;
1423 			break;
1424 		}
1425 
1426 		info = (thread_affinity_policy_t)policy_info;
1427 
1428 		if (!(*get_default)) {
1429 			info->affinity_tag = thread_affinity_get(thread);
1430 		} else {
1431 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1432 		}
1433 
1434 		break;
1435 	}
1436 
1437 	case THREAD_POLICY_STATE:
1438 	{
1439 		thread_policy_state_t           info;
1440 
1441 		if (*count < THREAD_POLICY_STATE_COUNT) {
1442 			result = KERN_INVALID_ARGUMENT;
1443 			break;
1444 		}
1445 
1446 		/* Only root can get this info */
1447 		if (!task_is_privileged(current_task())) {
1448 			result = KERN_PROTECTION_FAILURE;
1449 			break;
1450 		}
1451 
1452 		info = (thread_policy_state_t)(void*)policy_info;
1453 
1454 		if (!(*get_default)) {
1455 			info->flags = 0;
1456 
1457 			spl_t s = splsched();
1458 			thread_lock(thread);
1459 
1460 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1461 
1462 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1463 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1464 
1465 			info->thps_user_promotions          = 0;
1466 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1467 			info->thps_ipc_overrides            = thread->kevent_overrides;
1468 
1469 			proc_get_thread_policy_bitfield(thread, info);
1470 
1471 			thread_unlock(thread);
1472 			splx(s);
1473 		} else {
1474 			info->requested = 0;
1475 			info->effective = 0;
1476 			info->pending = 0;
1477 		}
1478 
1479 		break;
1480 	}
1481 
1482 	case THREAD_REQUESTED_STATE_POLICY:
1483 	{
1484 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1485 			result = KERN_INVALID_ARGUMENT;
1486 			break;
1487 		}
1488 
1489 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1490 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1491 
1492 		info->thrq_base_qos = req_policy->thrp_qos;
1493 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1494 		info->thrq_qos_override = req_policy->thrp_qos_override;
1495 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1496 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1497 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1498 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1499 
1500 		break;
1501 	}
1502 
1503 	case THREAD_LATENCY_QOS_POLICY:
1504 	{
1505 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1506 		thread_latency_qos_t plqos;
1507 
1508 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1509 			result = KERN_INVALID_ARGUMENT;
1510 			break;
1511 		}
1512 
1513 		if (*get_default) {
1514 			plqos = 0;
1515 		} else {
1516 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1517 		}
1518 
1519 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1520 	}
1521 	break;
1522 
1523 	case THREAD_THROUGHPUT_QOS_POLICY:
1524 	{
1525 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1526 		thread_throughput_qos_t ptqos;
1527 
1528 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1529 			result = KERN_INVALID_ARGUMENT;
1530 			break;
1531 		}
1532 
1533 		if (*get_default) {
1534 			ptqos = 0;
1535 		} else {
1536 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1537 		}
1538 
1539 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1540 	}
1541 	break;
1542 
1543 	case THREAD_QOS_POLICY:
1544 	{
1545 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1546 
1547 		if (*count < THREAD_QOS_POLICY_COUNT) {
1548 			result = KERN_INVALID_ARGUMENT;
1549 			break;
1550 		}
1551 
1552 		if (!(*get_default)) {
1553 			int relprio_value = 0;
1554 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1555 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1556 
1557 			info->tier_importance = -relprio_value;
1558 		} else {
1559 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1560 			info->tier_importance = 0;
1561 		}
1562 
1563 		break;
1564 	}
1565 
1566 	default:
1567 		result = KERN_INVALID_ARGUMENT;
1568 		break;
1569 	}
1570 
1571 	thread_mtx_unlock(thread);
1572 
1573 	return result;
1574 }
1575 
1576 void
thread_policy_create(thread_t thread)1577 thread_policy_create(thread_t thread)
1578 {
1579 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1580 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1581 	    thread_tid(thread), theffective_0(thread),
1582 	    theffective_1(thread), thread->base_pri, 0);
1583 
1584 	/* We pass a pend token but ignore it */
1585 	struct task_pend_token pend_token = {};
1586 
1587 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1588 
1589 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1590 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1591 	    thread_tid(thread), theffective_0(thread),
1592 	    theffective_1(thread), thread->base_pri, 0);
1593 }
1594 
1595 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1596 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1597 {
1598 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1599 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1600 	    thread_tid(thread), theffective_0(thread),
1601 	    theffective_1(thread), thread->base_pri, 0);
1602 
1603 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1604 
1605 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1606 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1607 	    thread_tid(thread), theffective_0(thread),
1608 	    theffective_1(thread), thread->base_pri, 0);
1609 }
1610 
1611 
1612 
1613 /*
1614  * One thread state update function TO RULE THEM ALL
1615  *
1616  * This function updates the thread effective policy fields
1617  * and pushes the results to the relevant subsystems.
1618  *
1619  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1620  */
1621 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1622 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1623     task_pend_token_t pend_token)
1624 {
1625 	/*
1626 	 * Step 1:
1627 	 *  Gather requested policy and effective task state
1628 	 */
1629 
1630 	const struct thread_requested_policy requested = thread->requested_policy;
1631 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1632 
1633 	/*
1634 	 * Step 2:
1635 	 *  Calculate new effective policies from requested policy, task and thread state
1636 	 *  Rules:
1637 	 *      Don't change requested, it won't take effect
1638 	 */
1639 
1640 	struct thread_effective_policy next = {};
1641 
1642 	next.thep_wi_driven = requested.thrp_wi_driven;
1643 
1644 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1645 
1646 	uint32_t next_qos = requested.thrp_qos;
1647 
1648 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1649 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1650 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1651 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1652 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1653 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1654 	}
1655 
1656 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1657 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1658 		/*
1659 		 * This thread is turnstile-boosted higher than the adaptive clamp
1660 		 * by a synchronous waiter. Allow that to override the adaptive
1661 		 * clamp temporarily for this thread only.
1662 		 */
1663 		next.thep_promote_above_task = true;
1664 		next_qos = requested.thrp_qos_promote;
1665 	}
1666 
1667 	next.thep_qos = next_qos;
1668 
1669 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1670 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1671 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1672 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1673 		} else {
1674 			next.thep_qos = task_effective.tep_qos_clamp;
1675 		}
1676 		next.thep_wi_driven = 0;
1677 	}
1678 
1679 	/*
1680 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1681 	 * This allows QoS promotions to work properly even after the process is unclamped.
1682 	 */
1683 	next.thep_qos_promote = next.thep_qos;
1684 
1685 	/* The ceiling only applies to threads that are in the QoS world */
1686 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1687 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1688 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1689 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1690 	}
1691 
1692 	/*
1693 	 * The QoS relative priority is only applicable when the original programmer's
1694 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1695 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1696 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1697 	 * presence of boosting, the programmer doesn't know what other actors
1698 	 * are boosting the thread.
1699 	 */
1700 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1701 	    (requested.thrp_qos == next.thep_qos) &&
1702 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1703 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1704 	} else {
1705 		next.thep_qos_relprio = 0;
1706 	}
1707 
1708 	/* Calculate DARWIN_BG */
1709 	bool wants_darwinbg        = false;
1710 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1711 
1712 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1713 		wants_darwinbg = true;
1714 	}
1715 
1716 	/*
1717 	 * If DARWIN_BG has been requested at either level, it's engaged.
1718 	 * darwinbg threads always create bg sockets,
1719 	 * but only some types of darwinbg change the sockets
1720 	 * after they're created
1721 	 */
1722 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1723 		wants_all_sockets_bg = wants_darwinbg = true;
1724 	}
1725 
1726 	if (requested.thrp_pidbind_bg) {
1727 		wants_all_sockets_bg = wants_darwinbg = true;
1728 	}
1729 
1730 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1731 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1732 		wants_darwinbg = true;
1733 	}
1734 
1735 	/* Calculate side effects of DARWIN_BG */
1736 
1737 	if (wants_darwinbg) {
1738 		next.thep_darwinbg = 1;
1739 		next.thep_wi_driven = 0;
1740 	}
1741 
1742 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1743 		next.thep_new_sockets_bg = 1;
1744 	}
1745 
1746 	/* Don't use task_effective.tep_all_sockets_bg here */
1747 	if (wants_all_sockets_bg) {
1748 		next.thep_all_sockets_bg = 1;
1749 	}
1750 
1751 	/* darwinbg implies background QOS (or lower) */
1752 	if (next.thep_darwinbg &&
1753 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1754 		next.thep_qos = THREAD_QOS_BACKGROUND;
1755 		next.thep_qos_relprio = 0;
1756 	}
1757 
1758 	/* Calculate IO policy */
1759 
1760 	int iopol = THROTTLE_LEVEL_TIER0;
1761 
1762 	/* Factor in the task's IO policy */
1763 	if (next.thep_darwinbg) {
1764 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1765 	}
1766 
1767 	if (!next.thep_promote_above_task) {
1768 		iopol = MAX(iopol, task_effective.tep_io_tier);
1769 	}
1770 
1771 	/* Look up the associated IO tier value for the QoS class */
1772 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1773 
1774 	iopol = MAX(iopol, requested.thrp_int_iotier);
1775 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1776 
1777 	/* Apply the kevent iotier override */
1778 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1779 
1780 	next.thep_io_tier = iopol;
1781 
1782 	/*
1783 	 * If a QoS override is causing IO to go into a lower tier, we also set
1784 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1785 	 * window when the override goes away.
1786 	 */
1787 
1788 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1789 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1790 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1791 
1792 	/* Calculate Passive IO policy */
1793 	if (requested.thrp_ext_iopassive ||
1794 	    requested.thrp_int_iopassive ||
1795 	    qos_io_override_active ||
1796 	    task_effective.tep_io_passive) {
1797 		next.thep_io_passive = 1;
1798 	}
1799 
1800 	/* Calculate timer QOS */
1801 	uint32_t latency_qos = requested.thrp_latency_qos;
1802 
1803 	if (!next.thep_promote_above_task) {
1804 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1805 	}
1806 
1807 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1808 
1809 	next.thep_latency_qos = latency_qos;
1810 
1811 	/* Calculate throughput QOS */
1812 	uint32_t through_qos = requested.thrp_through_qos;
1813 
1814 	if (!next.thep_promote_above_task) {
1815 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1816 	}
1817 
1818 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1819 
1820 	next.thep_through_qos = through_qos;
1821 
1822 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1823 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1824 		next.thep_terminated    = 1;
1825 		next.thep_darwinbg      = 0;
1826 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1827 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1828 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1829 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1830 		next.thep_wi_driven     = 0;
1831 	}
1832 
1833 	/*
1834 	 * Step 3:
1835 	 *  Swap out old policy for new policy
1836 	 */
1837 
1838 	struct thread_effective_policy prev = thread->effective_policy;
1839 
1840 	thread_update_qos_cpu_time_locked(thread);
1841 
1842 	/* This is the point where the new values become visible to other threads */
1843 	thread->effective_policy = next;
1844 
1845 	/*
1846 	 * Step 4:
1847 	 *  Pend updates that can't be done while holding the thread lock
1848 	 */
1849 
1850 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1851 		pend_token->tpt_update_sockets = 1;
1852 	}
1853 
1854 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1855 	if (prev.thep_io_tier != next.thep_io_tier) {
1856 		pend_token->tpt_update_throttle = 1;
1857 	}
1858 
1859 	/*
1860 	 * Check for the attributes that sfi_thread_classify() consults,
1861 	 *  and trigger SFI re-evaluation.
1862 	 */
1863 	if (prev.thep_qos != next.thep_qos ||
1864 	    prev.thep_darwinbg != next.thep_darwinbg) {
1865 		pend_token->tpt_update_thread_sfi = 1;
1866 	}
1867 
1868 	integer_t old_base_pri = thread->base_pri;
1869 
1870 	/*
1871 	 * Step 5:
1872 	 *  Update other subsystems as necessary if something has changed
1873 	 */
1874 
1875 	/* Check for the attributes that thread_recompute_priority() consults */
1876 	if (prev.thep_qos != next.thep_qos ||
1877 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1878 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1879 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1880 	    prev.thep_terminated != next.thep_terminated ||
1881 	    prev.thep_wi_driven != next.thep_wi_driven ||
1882 	    pend_token->tpt_force_recompute_pri == 1 ||
1883 	    recompute_priority) {
1884 		thread_recompute_priority(thread);
1885 	}
1886 
1887 	/*
1888 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1889 	 */
1890 	if (pend_token->tpt_update_turnstile &&
1891 	    ((old_base_pri == thread->base_pri) ||
1892 	    !thread_get_waiting_turnstile(thread))) {
1893 		/*
1894 		 * Reset update turnstile pend token since either
1895 		 * the thread priority did not change or thread is
1896 		 * not blocked on a turnstile.
1897 		 */
1898 		pend_token->tpt_update_turnstile = 0;
1899 	}
1900 }
1901 
1902 
1903 /*
1904  * Initiate a thread policy state transition on a thread with its TID
1905  * Useful if you cannot guarantee the thread won't get terminated
1906  * Precondition: No locks are held
1907  * Will take task lock - using the non-tid variant is faster
1908  * if you already have a thread ref.
1909  */
1910 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1911 proc_set_thread_policy_with_tid(task_t     task,
1912     uint64_t   tid,
1913     int        category,
1914     int        flavor,
1915     int        value)
1916 {
1917 	/* takes task lock, returns ref'ed thread or NULL */
1918 	thread_t thread = task_findtid(task, tid);
1919 
1920 	if (thread == THREAD_NULL) {
1921 		return;
1922 	}
1923 
1924 	proc_set_thread_policy(thread, category, flavor, value);
1925 
1926 	thread_deallocate(thread);
1927 }
1928 
1929 /*
1930  * Initiate a thread policy transition on a thread
1931  * This path supports networking transitions (i.e. darwinbg transitions)
1932  * Precondition: No locks are held
1933  */
1934 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1935 proc_set_thread_policy(thread_t   thread,
1936     int        category,
1937     int        flavor,
1938     int        value)
1939 {
1940 	proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1941 }
1942 
1943 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1944 proc_set_thread_policy_ext(thread_t   thread,
1945     int        category,
1946     int        flavor,
1947     int        value,
1948     int        value2)
1949 {
1950 	struct task_pend_token pend_token = {};
1951 
1952 	thread_mtx_lock(thread);
1953 
1954 	proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1955 
1956 	thread_mtx_unlock(thread);
1957 
1958 	thread_policy_update_complete_unlocked(thread, &pend_token);
1959 }
1960 
1961 /*
1962  * Do the things that can't be done while holding a thread mutex.
1963  * These are set up to call back into thread policy to get the latest value,
1964  * so they don't have to be synchronized with the update.
1965  * The only required semantic is 'call this sometime after updating effective policy'
1966  *
1967  * Precondition: Thread mutex is not held
1968  *
1969  * This may be called with the task lock held, but in that case it won't be
1970  * called with tpt_update_sockets set.
1971  */
1972 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1973 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1974 {
1975 #ifdef MACH_BSD
1976 	if (pend_token->tpt_update_sockets) {
1977 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1978 	}
1979 #endif /* MACH_BSD */
1980 
1981 	if (pend_token->tpt_update_throttle) {
1982 		rethrottle_thread(get_bsdthread_info(thread));
1983 	}
1984 
1985 	if (pend_token->tpt_update_thread_sfi) {
1986 		sfi_reevaluate(thread);
1987 	}
1988 
1989 	if (pend_token->tpt_update_turnstile) {
1990 		turnstile_update_thread_priority_chain(thread);
1991 	}
1992 }
1993 
1994 /*
1995  * Set and update thread policy
1996  * Thread mutex might be held
1997  */
1998 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1999 proc_set_thread_policy_locked(thread_t          thread,
2000     int               category,
2001     int               flavor,
2002     int               value,
2003     int               value2,
2004     task_pend_token_t pend_token)
2005 {
2006 	spl_t s = splsched();
2007 	thread_lock(thread);
2008 
2009 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2010 
2011 	thread_unlock(thread);
2012 	splx(s);
2013 }
2014 
2015 /*
2016  * Set and update thread policy
2017  * Thread spinlock is held
2018  */
2019 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2020 proc_set_thread_policy_spinlocked(thread_t          thread,
2021     int               category,
2022     int               flavor,
2023     int               value,
2024     int               value2,
2025     task_pend_token_t pend_token)
2026 {
2027 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2028 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2029 	    thread_tid(thread), threquested_0(thread),
2030 	    threquested_1(thread), value, 0);
2031 
2032 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2033 
2034 	thread_policy_update_spinlocked(thread, false, pend_token);
2035 
2036 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2037 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2038 	    thread_tid(thread), threquested_0(thread),
2039 	    threquested_1(thread), tpending(pend_token), 0);
2040 }
2041 
2042 /*
2043  * Set the requested state for a specific flavor to a specific value.
2044  */
2045 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2046 thread_set_requested_policy_spinlocked(thread_t     thread,
2047     int               category,
2048     int               flavor,
2049     int               value,
2050     int               value2,
2051     task_pend_token_t pend_token)
2052 {
2053 	int tier, passive;
2054 
2055 	struct thread_requested_policy requested = thread->requested_policy;
2056 
2057 	switch (flavor) {
2058 	/* Category: EXTERNAL and INTERNAL, thread and task */
2059 
2060 	case TASK_POLICY_DARWIN_BG:
2061 		if (category == TASK_POLICY_EXTERNAL) {
2062 			requested.thrp_ext_darwinbg = value;
2063 		} else {
2064 			requested.thrp_int_darwinbg = value;
2065 		}
2066 		pend_token->tpt_update_turnstile = 1;
2067 		break;
2068 
2069 	case TASK_POLICY_IOPOL:
2070 		proc_iopol_to_tier(value, &tier, &passive);
2071 		if (category == TASK_POLICY_EXTERNAL) {
2072 			requested.thrp_ext_iotier  = tier;
2073 			requested.thrp_ext_iopassive = passive;
2074 		} else {
2075 			requested.thrp_int_iotier  = tier;
2076 			requested.thrp_int_iopassive = passive;
2077 		}
2078 		break;
2079 
2080 	case TASK_POLICY_IO:
2081 		if (category == TASK_POLICY_EXTERNAL) {
2082 			requested.thrp_ext_iotier = value;
2083 		} else {
2084 			requested.thrp_int_iotier = value;
2085 		}
2086 		break;
2087 
2088 	case TASK_POLICY_PASSIVE_IO:
2089 		if (category == TASK_POLICY_EXTERNAL) {
2090 			requested.thrp_ext_iopassive = value;
2091 		} else {
2092 			requested.thrp_int_iopassive = value;
2093 		}
2094 		break;
2095 
2096 	/* Category: ATTRIBUTE, thread only */
2097 
2098 	case TASK_POLICY_PIDBIND_BG:
2099 		assert(category == TASK_POLICY_ATTRIBUTE);
2100 		requested.thrp_pidbind_bg = value;
2101 		pend_token->tpt_update_turnstile = 1;
2102 		break;
2103 
2104 	case TASK_POLICY_LATENCY_QOS:
2105 		assert(category == TASK_POLICY_ATTRIBUTE);
2106 		requested.thrp_latency_qos = value;
2107 		break;
2108 
2109 	case TASK_POLICY_THROUGH_QOS:
2110 		assert(category == TASK_POLICY_ATTRIBUTE);
2111 		requested.thrp_through_qos = value;
2112 		break;
2113 
2114 	case TASK_POLICY_QOS_OVERRIDE:
2115 		assert(category == TASK_POLICY_ATTRIBUTE);
2116 		requested.thrp_qos_override = value;
2117 		pend_token->tpt_update_turnstile = 1;
2118 		break;
2119 
2120 	case TASK_POLICY_QOS_AND_RELPRIO:
2121 		assert(category == TASK_POLICY_ATTRIBUTE);
2122 		requested.thrp_qos = value;
2123 		requested.thrp_qos_relprio = value2;
2124 		pend_token->tpt_update_turnstile = 1;
2125 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2126 		break;
2127 
2128 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2129 		assert(category == TASK_POLICY_ATTRIBUTE);
2130 		requested.thrp_qos_workq_override = value;
2131 		pend_token->tpt_update_turnstile = 1;
2132 		break;
2133 
2134 	case TASK_POLICY_QOS_PROMOTE:
2135 		assert(category == TASK_POLICY_ATTRIBUTE);
2136 		requested.thrp_qos_promote = value;
2137 		break;
2138 
2139 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2140 		assert(category == TASK_POLICY_ATTRIBUTE);
2141 		requested.thrp_qos_kevent_override = value;
2142 		pend_token->tpt_update_turnstile = 1;
2143 		break;
2144 
2145 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2146 		assert(category == TASK_POLICY_ATTRIBUTE);
2147 		requested.thrp_qos_wlsvc_override = value;
2148 		pend_token->tpt_update_turnstile = 1;
2149 		break;
2150 
2151 	case TASK_POLICY_TERMINATED:
2152 		assert(category == TASK_POLICY_ATTRIBUTE);
2153 		requested.thrp_terminated = value;
2154 		break;
2155 
2156 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2157 		assert(category == TASK_POLICY_ATTRIBUTE);
2158 		requested.thrp_iotier_kevent_override = value;
2159 		break;
2160 
2161 	case TASK_POLICY_WI_DRIVEN:
2162 		assert(category == TASK_POLICY_ATTRIBUTE);
2163 		assert(thread == current_thread());
2164 
2165 		const bool set_policy = value;
2166 		const sched_mode_t mode = value2;
2167 
2168 		requested.thrp_wi_driven = set_policy ? 1 : 0;
2169 
2170 		/*
2171 		 * No sched mode change for REALTIME (threads must explicitly
2172 		 * opt-in), however the priority_offset needs to be updated.
2173 		 */
2174 		if (mode == TH_MODE_REALTIME) {
2175 			const int pri = work_interval_get_priority(thread);
2176 			assert3u(pri, >=, BASEPRI_RTQUEUES);
2177 			thread->realtime.priority_offset = set_policy ?
2178 			    (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2179 		} else {
2180 			sched_set_thread_mode_user(thread, mode);
2181 			if (set_policy) {
2182 				thread->static_param = true;
2183 			}
2184 		}
2185 		break;
2186 
2187 	default:
2188 		panic("unknown task policy: %d %d %d", category, flavor, value);
2189 		break;
2190 	}
2191 
2192 	thread->requested_policy = requested;
2193 }
2194 
2195 /*
2196  * Gets what you set. Effective values may be different.
2197  * Precondition: No locks are held
2198  */
2199 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2200 proc_get_thread_policy(thread_t   thread,
2201     int        category,
2202     int        flavor)
2203 {
2204 	int value = 0;
2205 	thread_mtx_lock(thread);
2206 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2207 	thread_mtx_unlock(thread);
2208 	return value;
2209 }
2210 
2211 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2212 proc_get_thread_policy_locked(thread_t   thread,
2213     int        category,
2214     int        flavor,
2215     int*       value2)
2216 {
2217 	int value = 0;
2218 
2219 	spl_t s = splsched();
2220 	thread_lock(thread);
2221 
2222 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2223 
2224 	thread_unlock(thread);
2225 	splx(s);
2226 
2227 	return value;
2228 }
2229 
2230 /*
2231  * Gets what you set. Effective values may be different.
2232  */
2233 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2234 thread_get_requested_policy_spinlocked(thread_t thread,
2235     int      category,
2236     int      flavor,
2237     int*     value2)
2238 {
2239 	int value = 0;
2240 
2241 	struct thread_requested_policy requested = thread->requested_policy;
2242 
2243 	switch (flavor) {
2244 	case TASK_POLICY_DARWIN_BG:
2245 		if (category == TASK_POLICY_EXTERNAL) {
2246 			value = requested.thrp_ext_darwinbg;
2247 		} else {
2248 			value = requested.thrp_int_darwinbg;
2249 		}
2250 		break;
2251 	case TASK_POLICY_IOPOL:
2252 		if (category == TASK_POLICY_EXTERNAL) {
2253 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2254 			    requested.thrp_ext_iopassive);
2255 		} else {
2256 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2257 			    requested.thrp_int_iopassive);
2258 		}
2259 		break;
2260 	case TASK_POLICY_IO:
2261 		if (category == TASK_POLICY_EXTERNAL) {
2262 			value = requested.thrp_ext_iotier;
2263 		} else {
2264 			value = requested.thrp_int_iotier;
2265 		}
2266 		break;
2267 	case TASK_POLICY_PASSIVE_IO:
2268 		if (category == TASK_POLICY_EXTERNAL) {
2269 			value = requested.thrp_ext_iopassive;
2270 		} else {
2271 			value = requested.thrp_int_iopassive;
2272 		}
2273 		break;
2274 	case TASK_POLICY_QOS:
2275 		assert(category == TASK_POLICY_ATTRIBUTE);
2276 		value = requested.thrp_qos;
2277 		break;
2278 	case TASK_POLICY_QOS_OVERRIDE:
2279 		assert(category == TASK_POLICY_ATTRIBUTE);
2280 		value = requested.thrp_qos_override;
2281 		break;
2282 	case TASK_POLICY_LATENCY_QOS:
2283 		assert(category == TASK_POLICY_ATTRIBUTE);
2284 		value = requested.thrp_latency_qos;
2285 		break;
2286 	case TASK_POLICY_THROUGH_QOS:
2287 		assert(category == TASK_POLICY_ATTRIBUTE);
2288 		value = requested.thrp_through_qos;
2289 		break;
2290 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2291 		assert(category == TASK_POLICY_ATTRIBUTE);
2292 		value = requested.thrp_qos_workq_override;
2293 		break;
2294 	case TASK_POLICY_QOS_AND_RELPRIO:
2295 		assert(category == TASK_POLICY_ATTRIBUTE);
2296 		assert(value2 != NULL);
2297 		value = requested.thrp_qos;
2298 		*value2 = requested.thrp_qos_relprio;
2299 		break;
2300 	case TASK_POLICY_QOS_PROMOTE:
2301 		assert(category == TASK_POLICY_ATTRIBUTE);
2302 		value = requested.thrp_qos_promote;
2303 		break;
2304 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2305 		assert(category == TASK_POLICY_ATTRIBUTE);
2306 		value = requested.thrp_qos_kevent_override;
2307 		break;
2308 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2309 		assert(category == TASK_POLICY_ATTRIBUTE);
2310 		value = requested.thrp_qos_wlsvc_override;
2311 		break;
2312 	case TASK_POLICY_TERMINATED:
2313 		assert(category == TASK_POLICY_ATTRIBUTE);
2314 		value = requested.thrp_terminated;
2315 		break;
2316 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2317 		assert(category == TASK_POLICY_ATTRIBUTE);
2318 		value = requested.thrp_iotier_kevent_override;
2319 		break;
2320 
2321 	case TASK_POLICY_WI_DRIVEN:
2322 		assert(category == TASK_POLICY_ATTRIBUTE);
2323 		value = requested.thrp_wi_driven;
2324 		break;
2325 
2326 	default:
2327 		panic("unknown policy_flavor %d", flavor);
2328 		break;
2329 	}
2330 
2331 	return value;
2332 }
2333 
2334 /*
2335  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2336  *
2337  * NOTE: This accessor does not take the task or thread lock.
2338  * Notifications of state updates need to be externally synchronized with state queries.
2339  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2340  * within the context of a timer interrupt.
2341  *
2342  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2343  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2344  *      I don't think that cost is worth not having the right answer.
2345  */
2346 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2347 proc_get_effective_thread_policy(thread_t thread,
2348     int      flavor)
2349 {
2350 	int value = 0;
2351 
2352 	switch (flavor) {
2353 	case TASK_POLICY_DARWIN_BG:
2354 		/*
2355 		 * This call is used within the timer layer, as well as
2356 		 * prioritizing requests to the graphics system.
2357 		 * It also informs SFI and originator-bg-state.
2358 		 * Returns 1 for background mode, 0 for normal mode
2359 		 */
2360 
2361 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2362 		break;
2363 	case TASK_POLICY_IO:
2364 		/*
2365 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2366 		 * Returns THROTTLE_LEVEL_* values
2367 		 */
2368 		value = thread->effective_policy.thep_io_tier;
2369 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2370 			value = MIN(value, thread->iotier_override);
2371 		}
2372 		break;
2373 	case TASK_POLICY_PASSIVE_IO:
2374 		/*
2375 		 * The I/O system calls here to find out whether an operation should be passive.
2376 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2377 		 * Returns 1 for passive mode, 0 for normal mode
2378 		 *
2379 		 * If an override is causing IO to go into a lower tier, we also set
2380 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2381 		 * window when the override goes away.
2382 		 */
2383 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2384 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2385 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2386 			value = 1;
2387 		}
2388 		break;
2389 	case TASK_POLICY_ALL_SOCKETS_BG:
2390 		/*
2391 		 * do_background_socket() calls this to determine whether
2392 		 * it should change the thread's sockets
2393 		 * Returns 1 for background mode, 0 for normal mode
2394 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2395 		 * doesn't get you out of the network throttle.
2396 		 */
2397 		value = (thread->effective_policy.thep_all_sockets_bg ||
2398 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2399 		break;
2400 	case TASK_POLICY_NEW_SOCKETS_BG:
2401 		/*
2402 		 * socreate() calls this to determine if it should mark a new socket as background
2403 		 * Returns 1 for background mode, 0 for normal mode
2404 		 */
2405 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2406 		break;
2407 	case TASK_POLICY_LATENCY_QOS:
2408 		/*
2409 		 * timer arming calls into here to find out the timer coalescing level
2410 		 * Returns a latency QoS tier (0-6)
2411 		 */
2412 		value = thread->effective_policy.thep_latency_qos;
2413 		break;
2414 	case TASK_POLICY_THROUGH_QOS:
2415 		/*
2416 		 * This value is passed into the urgency callout from the scheduler
2417 		 * to the performance management subsystem.
2418 		 *
2419 		 * Returns a throughput QoS tier (0-6)
2420 		 */
2421 		value = thread->effective_policy.thep_through_qos;
2422 		break;
2423 	case TASK_POLICY_QOS:
2424 		/*
2425 		 * This is communicated to the performance management layer and SFI.
2426 		 *
2427 		 * Returns a QoS policy tier
2428 		 */
2429 		value = thread->effective_policy.thep_qos;
2430 		break;
2431 	default:
2432 		panic("unknown thread policy flavor %d", flavor);
2433 		break;
2434 	}
2435 
2436 	return value;
2437 }
2438 
2439 
2440 /*
2441  * (integer_t) casts limit the number of bits we can fit here
2442  * this interface is deprecated and replaced by the _EXT struct ?
2443  */
2444 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2445 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2446 {
2447 	uint64_t bits = 0;
2448 	struct thread_requested_policy requested = thread->requested_policy;
2449 
2450 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2451 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2452 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2453 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2454 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2455 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2456 
2457 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2458 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2459 
2460 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2461 
2462 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2463 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2464 
2465 	info->requested = (integer_t) bits;
2466 	bits = 0;
2467 
2468 	struct thread_effective_policy effective = thread->effective_policy;
2469 
2470 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2471 
2472 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2473 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2474 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2475 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2476 
2477 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2478 
2479 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2480 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2481 
2482 	info->effective = (integer_t)bits;
2483 	bits = 0;
2484 
2485 	info->pending = 0;
2486 }
2487 
2488 /*
2489  * Sneakily trace either the task and thread requested
2490  * or just the thread requested, depending on if we have enough room.
2491  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2492  *
2493  *                                LP32            LP64
2494  * threquested_0(thread)          thread[0]       task[0]
2495  * threquested_1(thread)          thread[1]       thread[0]
2496  *
2497  */
2498 
2499 uintptr_t
threquested_0(thread_t thread)2500 threquested_0(thread_t thread)
2501 {
2502 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2503 
2504 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2505 
2506 	return raw[0];
2507 }
2508 
2509 uintptr_t
threquested_1(thread_t thread)2510 threquested_1(thread_t thread)
2511 {
2512 #if defined __LP64__
2513 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2514 #else
2515 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2516 	return raw[1];
2517 #endif
2518 }
2519 
2520 uintptr_t
theffective_0(thread_t thread)2521 theffective_0(thread_t thread)
2522 {
2523 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2524 
2525 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2526 	return raw[0];
2527 }
2528 
2529 uintptr_t
theffective_1(thread_t thread)2530 theffective_1(thread_t thread)
2531 {
2532 #if defined __LP64__
2533 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2534 #else
2535 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2536 	return raw[1];
2537 #endif
2538 }
2539 
2540 
2541 /*
2542  * Set an override on the thread which is consulted with a
2543  * higher priority than the task/thread policy. This should
2544  * only be set for temporary grants until the thread
2545  * returns to the userspace boundary
2546  *
2547  * We use atomic operations to swap in the override, with
2548  * the assumption that the thread itself can
2549  * read the override and clear it on return to userspace.
2550  *
2551  * No locking is performed, since it is acceptable to see
2552  * a stale override for one loop through throttle_lowpri_io().
2553  * However a thread reference must be held on the thread.
2554  */
2555 
2556 void
set_thread_iotier_override(thread_t thread,int policy)2557 set_thread_iotier_override(thread_t thread, int policy)
2558 {
2559 	int current_override;
2560 
2561 	/* Let most aggressive I/O policy win until user boundary */
2562 	do {
2563 		current_override = thread->iotier_override;
2564 
2565 		if (current_override != THROTTLE_LEVEL_NONE) {
2566 			policy = MIN(current_override, policy);
2567 		}
2568 
2569 		if (current_override == policy) {
2570 			/* no effective change */
2571 			return;
2572 		}
2573 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2574 
2575 	/*
2576 	 * Since the thread may be currently throttled,
2577 	 * re-evaluate tiers and potentially break out
2578 	 * of an msleep
2579 	 */
2580 	rethrottle_thread(get_bsdthread_info(thread));
2581 }
2582 
2583 /*
2584  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2585  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2586  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2587  * priority thread. In these cases, we attempt to propagate the priority token, as long
2588  * as the subsystem informs us of the relationships between the threads. The userspace
2589  * synchronization subsystem should maintain the information of owner->resource and
2590  * resource->waiters itself.
2591  */
2592 
2593 /*
2594  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2595  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2596  * to be handled specially in the future, but for now it's fine to slam
2597  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2598  */
2599 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2600 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2601 {
2602 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2603 		/* Map all input resource/type to a single one */
2604 		*resource = USER_ADDR_NULL;
2605 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2606 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2607 		/* no transform */
2608 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2609 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2610 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2611 			*resource = USER_ADDR_NULL;
2612 		}
2613 	}
2614 }
2615 
2616 /* This helper routine finds an existing override if known. Locking should be done by caller */
2617 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2618 find_qos_override(thread_t thread,
2619     user_addr_t resource,
2620     int resource_type)
2621 {
2622 	struct thread_qos_override *override;
2623 
2624 	override = thread->overrides;
2625 	while (override) {
2626 		if (override->override_resource == resource &&
2627 		    override->override_resource_type == resource_type) {
2628 			return override;
2629 		}
2630 
2631 		override = override->override_next;
2632 	}
2633 
2634 	return NULL;
2635 }
2636 
2637 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2638 find_and_decrement_qos_override(thread_t       thread,
2639     user_addr_t    resource,
2640     int            resource_type,
2641     boolean_t      reset,
2642     struct thread_qos_override **free_override_list)
2643 {
2644 	struct thread_qos_override *override, *override_prev;
2645 
2646 	override_prev = NULL;
2647 	override = thread->overrides;
2648 	while (override) {
2649 		struct thread_qos_override *override_next = override->override_next;
2650 
2651 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2652 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2653 			if (reset) {
2654 				override->override_contended_resource_count = 0;
2655 			} else {
2656 				override->override_contended_resource_count--;
2657 			}
2658 
2659 			if (override->override_contended_resource_count == 0) {
2660 				if (override_prev == NULL) {
2661 					thread->overrides = override_next;
2662 				} else {
2663 					override_prev->override_next = override_next;
2664 				}
2665 
2666 				/* Add to out-param for later zfree */
2667 				override->override_next = *free_override_list;
2668 				*free_override_list = override;
2669 			} else {
2670 				override_prev = override;
2671 			}
2672 
2673 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2674 				return;
2675 			}
2676 		} else {
2677 			override_prev = override;
2678 		}
2679 
2680 		override = override_next;
2681 	}
2682 }
2683 
2684 /* This helper recalculates the current requested override using the policy selected at boot */
2685 static int
calculate_requested_qos_override(thread_t thread)2686 calculate_requested_qos_override(thread_t thread)
2687 {
2688 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2689 		return THREAD_QOS_UNSPECIFIED;
2690 	}
2691 
2692 	/* iterate over all overrides and calculate MAX */
2693 	struct thread_qos_override *override;
2694 	int qos_override = THREAD_QOS_UNSPECIFIED;
2695 
2696 	override = thread->overrides;
2697 	while (override) {
2698 		qos_override = MAX(qos_override, override->override_qos);
2699 		override = override->override_next;
2700 	}
2701 
2702 	return qos_override;
2703 }
2704 
2705 /*
2706  * Returns:
2707  * - 0 on success
2708  * - EINVAL if some invalid input was passed
2709  */
2710 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2711 proc_thread_qos_add_override_internal(thread_t         thread,
2712     int              override_qos,
2713     boolean_t        first_override_for_resource,
2714     user_addr_t      resource,
2715     int              resource_type)
2716 {
2717 	struct task_pend_token pend_token = {};
2718 	int rc = 0;
2719 
2720 	thread_mtx_lock(thread);
2721 
2722 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2723 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2724 
2725 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2726 	    uint64_t, thread->requested_policy.thrp_qos,
2727 	    uint64_t, thread->effective_policy.thep_qos,
2728 	    int, override_qos, boolean_t, first_override_for_resource);
2729 
2730 	struct thread_qos_override *override;
2731 	struct thread_qos_override *override_new = NULL;
2732 	int new_qos_override, prev_qos_override;
2733 	int new_effective_qos;
2734 
2735 	canonicalize_resource_and_type(&resource, &resource_type);
2736 
2737 	override = find_qos_override(thread, resource, resource_type);
2738 	if (first_override_for_resource && !override) {
2739 		/* We need to allocate a new object. Drop the thread lock and
2740 		 * recheck afterwards in case someone else added the override
2741 		 */
2742 		thread_mtx_unlock(thread);
2743 		override_new = zalloc(thread_qos_override_zone);
2744 		thread_mtx_lock(thread);
2745 		override = find_qos_override(thread, resource, resource_type);
2746 	}
2747 	if (first_override_for_resource && override) {
2748 		/* Someone else already allocated while the thread lock was dropped */
2749 		override->override_contended_resource_count++;
2750 	} else if (!override && override_new) {
2751 		override = override_new;
2752 		override_new = NULL;
2753 		override->override_next = thread->overrides;
2754 		/* since first_override_for_resource was TRUE */
2755 		override->override_contended_resource_count = 1;
2756 		override->override_resource = resource;
2757 		override->override_resource_type = (int16_t)resource_type;
2758 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2759 		thread->overrides = override;
2760 	}
2761 
2762 	if (override) {
2763 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2764 			override->override_qos = (int16_t)override_qos;
2765 		} else {
2766 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2767 		}
2768 	}
2769 
2770 	/* Determine how to combine the various overrides into a single current
2771 	 * requested override
2772 	 */
2773 	new_qos_override = calculate_requested_qos_override(thread);
2774 
2775 	prev_qos_override = proc_get_thread_policy_locked(thread,
2776 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2777 
2778 	if (new_qos_override != prev_qos_override) {
2779 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2780 		    TASK_POLICY_QOS_OVERRIDE,
2781 		    new_qos_override, 0, &pend_token);
2782 	}
2783 
2784 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2785 
2786 	thread_mtx_unlock(thread);
2787 
2788 	thread_policy_update_complete_unlocked(thread, &pend_token);
2789 
2790 	if (override_new) {
2791 		zfree(thread_qos_override_zone, override_new);
2792 	}
2793 
2794 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2795 	    int, new_qos_override, int, new_effective_qos, int, rc);
2796 
2797 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2798 	    new_qos_override, resource, resource_type, 0, 0);
2799 
2800 	return rc;
2801 }
2802 
2803 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2804 proc_thread_qos_add_override(task_t           task,
2805     thread_t         thread,
2806     uint64_t         tid,
2807     int              override_qos,
2808     boolean_t        first_override_for_resource,
2809     user_addr_t      resource,
2810     int              resource_type)
2811 {
2812 	boolean_t has_thread_reference = FALSE;
2813 	int rc = 0;
2814 
2815 	if (thread == THREAD_NULL) {
2816 		thread = task_findtid(task, tid);
2817 		/* returns referenced thread */
2818 
2819 		if (thread == THREAD_NULL) {
2820 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2821 			    tid, 0, 0xdead, 0, 0);
2822 			return ESRCH;
2823 		}
2824 		has_thread_reference = TRUE;
2825 	} else {
2826 		assert(get_threadtask(thread) == task);
2827 	}
2828 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2829 	    first_override_for_resource, resource, resource_type);
2830 	if (has_thread_reference) {
2831 		thread_deallocate(thread);
2832 	}
2833 
2834 	return rc;
2835 }
2836 
2837 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2838 proc_thread_qos_remove_override_internal(thread_t       thread,
2839     user_addr_t    resource,
2840     int            resource_type,
2841     boolean_t      reset)
2842 {
2843 	struct task_pend_token pend_token = {};
2844 
2845 	struct thread_qos_override *deferred_free_override_list = NULL;
2846 	int new_qos_override, prev_qos_override, new_effective_qos;
2847 
2848 	thread_mtx_lock(thread);
2849 
2850 	canonicalize_resource_and_type(&resource, &resource_type);
2851 
2852 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2853 
2854 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2855 	    thread_tid(thread), resource, reset, 0, 0);
2856 
2857 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2858 	    uint64_t, thread->requested_policy.thrp_qos,
2859 	    uint64_t, thread->effective_policy.thep_qos);
2860 
2861 	/* Determine how to combine the various overrides into a single current requested override */
2862 	new_qos_override = calculate_requested_qos_override(thread);
2863 
2864 	spl_t s = splsched();
2865 	thread_lock(thread);
2866 
2867 	/*
2868 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2869 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2870 	 * This means you can't change the current override from a spinlock-only setter.
2871 	 */
2872 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2873 
2874 	if (new_qos_override != prev_qos_override) {
2875 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2876 	}
2877 
2878 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2879 
2880 	thread_unlock(thread);
2881 	splx(s);
2882 
2883 	thread_mtx_unlock(thread);
2884 
2885 	thread_policy_update_complete_unlocked(thread, &pend_token);
2886 
2887 	while (deferred_free_override_list) {
2888 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2889 
2890 		zfree(thread_qos_override_zone, deferred_free_override_list);
2891 		deferred_free_override_list = override_next;
2892 	}
2893 
2894 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2895 	    int, new_qos_override, int, new_effective_qos);
2896 
2897 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2898 	    thread_tid(thread), 0, 0, 0, 0);
2899 }
2900 
2901 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2902 proc_thread_qos_remove_override(task_t      task,
2903     thread_t    thread,
2904     uint64_t    tid,
2905     user_addr_t resource,
2906     int         resource_type)
2907 {
2908 	boolean_t has_thread_reference = FALSE;
2909 
2910 	if (thread == THREAD_NULL) {
2911 		thread = task_findtid(task, tid);
2912 		/* returns referenced thread */
2913 
2914 		if (thread == THREAD_NULL) {
2915 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2916 			    tid, 0, 0xdead, 0, 0);
2917 			return ESRCH;
2918 		}
2919 		has_thread_reference = TRUE;
2920 	} else {
2921 		assert(task == get_threadtask(thread));
2922 	}
2923 
2924 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2925 
2926 	if (has_thread_reference) {
2927 		thread_deallocate(thread);
2928 	}
2929 
2930 	return 0;
2931 }
2932 
2933 /* Deallocate before thread termination */
2934 void
proc_thread_qos_deallocate(thread_t thread)2935 proc_thread_qos_deallocate(thread_t thread)
2936 {
2937 	/* This thread must have no more IPC overrides. */
2938 	assert(thread->kevent_overrides == 0);
2939 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2940 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2941 
2942 	/*
2943 	 * Clear out any lingering override objects.
2944 	 */
2945 	struct thread_qos_override *override;
2946 
2947 	thread_mtx_lock(thread);
2948 	override = thread->overrides;
2949 	thread->overrides = NULL;
2950 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2951 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2952 	thread_mtx_unlock(thread);
2953 
2954 	while (override) {
2955 		struct thread_qos_override *override_next = override->override_next;
2956 
2957 		zfree(thread_qos_override_zone, override);
2958 		override = override_next;
2959 	}
2960 }
2961 
2962 /*
2963  * Set up the primordial thread's QoS
2964  */
2965 void
task_set_main_thread_qos(task_t task,thread_t thread)2966 task_set_main_thread_qos(task_t task, thread_t thread)
2967 {
2968 	struct task_pend_token pend_token = {};
2969 
2970 	assert(get_threadtask(thread) == task);
2971 
2972 	thread_mtx_lock(thread);
2973 
2974 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2975 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2976 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2977 	    thread->requested_policy.thrp_qos, 0);
2978 
2979 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2980 
2981 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2982 	    primordial_qos, 0, &pend_token);
2983 
2984 	thread_mtx_unlock(thread);
2985 
2986 	thread_policy_update_complete_unlocked(thread, &pend_token);
2987 
2988 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2989 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2990 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2991 	    primordial_qos, 0);
2992 }
2993 
2994 /*
2995  * KPI for pthread kext
2996  *
2997  * Return a good guess at what the initial manager QoS will be
2998  * Dispatch can override this in userspace if it so chooses
2999  */
3000 thread_qos_t
task_get_default_manager_qos(task_t task)3001 task_get_default_manager_qos(task_t task)
3002 {
3003 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
3004 
3005 	if (primordial_qos == THREAD_QOS_LEGACY) {
3006 		primordial_qos = THREAD_QOS_USER_INITIATED;
3007 	}
3008 
3009 	return primordial_qos;
3010 }
3011 
3012 /*
3013  * Check if the kernel promotion on thread has changed
3014  * and apply it.
3015  *
3016  * thread locked on entry and exit
3017  */
3018 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3019 thread_recompute_kernel_promotion_locked(thread_t thread)
3020 {
3021 	boolean_t needs_update = FALSE;
3022 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3023 
3024 	/*
3025 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3026 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3027 	 * and propagates the priority through the chain with the same cap, because as of now it does
3028 	 * not differenciate on the kernel primitive.
3029 	 *
3030 	 * If this assumption will change with the adoption of a kernel primitive that does not
3031 	 * cap the when adding/propagating,
3032 	 * then here is the place to put the generic cap for all kernel primitives
3033 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3034 	 */
3035 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3036 
3037 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3038 		KDBG(MACHDBG_CODE(
3039 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3040 		    thread_tid(thread),
3041 		    kern_promotion_schedpri,
3042 		    thread->kern_promotion_schedpri);
3043 
3044 		needs_update = TRUE;
3045 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3046 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3047 	}
3048 
3049 	return needs_update;
3050 }
3051 
3052 /*
3053  * Check if the user promotion on thread has changed
3054  * and apply it.
3055  *
3056  * thread locked on entry, might drop the thread lock
3057  * and reacquire it.
3058  */
3059 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3060 thread_recompute_user_promotion_locked(thread_t thread)
3061 {
3062 	boolean_t needs_update = FALSE;
3063 	struct task_pend_token pend_token = {};
3064 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3065 	int old_base_pri = thread->base_pri;
3066 	thread_qos_t qos_promotion;
3067 
3068 	/* Check if user promotion has changed */
3069 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3070 		return needs_update;
3071 	} else {
3072 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3073 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3074 		    thread_tid(thread),
3075 		    user_promotion_basepri,
3076 		    thread->user_promotion_basepri,
3077 		    0, 0);
3078 		KDBG(MACHDBG_CODE(
3079 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3080 		    thread_tid(thread),
3081 		    user_promotion_basepri,
3082 		    thread->user_promotion_basepri);
3083 	}
3084 
3085 	/* Update the user promotion base pri */
3086 	thread->user_promotion_basepri = user_promotion_basepri;
3087 	pend_token.tpt_force_recompute_pri = 1;
3088 
3089 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3090 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3091 	} else {
3092 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3093 	}
3094 
3095 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3096 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3097 
3098 	if (thread_get_waiting_turnstile(thread) &&
3099 	    thread->base_pri != old_base_pri) {
3100 		needs_update = TRUE;
3101 	}
3102 
3103 	thread_unlock(thread);
3104 
3105 	thread_policy_update_complete_unlocked(thread, &pend_token);
3106 
3107 	thread_lock(thread);
3108 
3109 	return needs_update;
3110 }
3111 
3112 /*
3113  * Convert the thread user promotion base pri to qos for threads in qos world.
3114  * For priority above UI qos, the qos would be set to UI.
3115  */
3116 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3117 thread_user_promotion_qos_for_pri(int priority)
3118 {
3119 	thread_qos_t qos;
3120 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3121 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3122 			return qos;
3123 		}
3124 	}
3125 	return THREAD_QOS_MAINTENANCE;
3126 }
3127 
3128 /*
3129  * Set the thread's QoS Kevent override
3130  * Owned by the Kevent subsystem
3131  *
3132  * May be called with spinlocks held, but not spinlocks
3133  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3134  *
3135  * One 'add' must be balanced by one 'drop'.
3136  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3137  * Before the thread is deallocated, there must be 0 remaining overrides.
3138  */
3139 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3140 thread_kevent_override(thread_t    thread,
3141     uint32_t    qos_override,
3142     boolean_t   is_new_override)
3143 {
3144 	struct task_pend_token pend_token = {};
3145 	boolean_t needs_update;
3146 
3147 	spl_t s = splsched();
3148 	thread_lock(thread);
3149 
3150 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3151 
3152 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3153 	assert(qos_override < THREAD_QOS_LAST);
3154 
3155 	if (is_new_override) {
3156 		if (thread->kevent_overrides++ == 0) {
3157 			/* This add is the first override for this thread */
3158 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3159 		} else {
3160 			/* There are already other overrides in effect for this thread */
3161 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3162 		}
3163 	} else {
3164 		/* There must be at least one override (the previous add call) in effect */
3165 		assert(thread->kevent_overrides > 0);
3166 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3167 	}
3168 
3169 	/*
3170 	 * We can't allow lowering if there are several IPC overrides because
3171 	 * the caller can't possibly know the whole truth
3172 	 */
3173 	if (thread->kevent_overrides == 1) {
3174 		needs_update = qos_override != old_override;
3175 	} else {
3176 		needs_update = qos_override > old_override;
3177 	}
3178 
3179 	if (needs_update) {
3180 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3181 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3182 		    qos_override, 0, &pend_token);
3183 		assert(pend_token.tpt_update_sockets == 0);
3184 	}
3185 
3186 	thread_unlock(thread);
3187 	splx(s);
3188 
3189 	thread_policy_update_complete_unlocked(thread, &pend_token);
3190 }
3191 
3192 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3193 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3194 {
3195 	thread_kevent_override(thread, qos_override, TRUE);
3196 }
3197 
3198 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3199 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3200 {
3201 	thread_kevent_override(thread, qos_override, FALSE);
3202 }
3203 
3204 void
thread_drop_kevent_override(thread_t thread)3205 thread_drop_kevent_override(thread_t thread)
3206 {
3207 	struct task_pend_token pend_token = {};
3208 
3209 	spl_t s = splsched();
3210 	thread_lock(thread);
3211 
3212 	assert(thread->kevent_overrides > 0);
3213 
3214 	if (--thread->kevent_overrides == 0) {
3215 		/*
3216 		 * There are no more overrides for this thread, so we should
3217 		 * clear out the saturated override value
3218 		 */
3219 
3220 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3221 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3222 		    0, &pend_token);
3223 	}
3224 
3225 	thread_unlock(thread);
3226 	splx(s);
3227 
3228 	thread_policy_update_complete_unlocked(thread, &pend_token);
3229 }
3230 
3231 /*
3232  * Set the thread's QoS Workloop Servicer override
3233  * Owned by the Kevent subsystem
3234  *
3235  * May be called with spinlocks held, but not spinlocks
3236  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3237  *
3238  * One 'add' must be balanced by one 'drop'.
3239  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3240  * Before the thread is deallocated, there must be 0 remaining overrides.
3241  */
3242 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3243 thread_servicer_override(thread_t    thread,
3244     uint32_t    qos_override,
3245     boolean_t   is_new_override)
3246 {
3247 	struct task_pend_token pend_token = {};
3248 
3249 	spl_t s = splsched();
3250 	thread_lock(thread);
3251 
3252 	if (is_new_override) {
3253 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3254 	} else {
3255 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3256 	}
3257 
3258 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3259 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3260 	    qos_override, 0, &pend_token);
3261 
3262 	thread_unlock(thread);
3263 	splx(s);
3264 
3265 	assert(pend_token.tpt_update_sockets == 0);
3266 	thread_policy_update_complete_unlocked(thread, &pend_token);
3267 }
3268 
3269 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3270 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3271 {
3272 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3273 	assert(qos_override < THREAD_QOS_LAST);
3274 
3275 	thread_servicer_override(thread, qos_override, TRUE);
3276 }
3277 
3278 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3279 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3280 {
3281 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3282 	assert(qos_override < THREAD_QOS_LAST);
3283 
3284 	thread_servicer_override(thread, qos_override, FALSE);
3285 }
3286 
3287 void
thread_drop_servicer_override(thread_t thread)3288 thread_drop_servicer_override(thread_t thread)
3289 {
3290 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3291 }
3292 
3293 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3294 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3295 {
3296 	struct task_pend_token pend_token = {};
3297 	uint8_t current_iotier;
3298 
3299 	/* Check if the update is needed */
3300 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3301 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3302 
3303 	if (iotier_override == current_iotier) {
3304 		return;
3305 	}
3306 
3307 	spl_t s = splsched();
3308 	thread_lock(thread);
3309 
3310 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3311 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3312 	    iotier_override, 0, &pend_token);
3313 
3314 	thread_unlock(thread);
3315 	splx(s);
3316 
3317 	assert(pend_token.tpt_update_sockets == 0);
3318 	thread_policy_update_complete_unlocked(thread, &pend_token);
3319 }
3320 
3321 /* Get current requested qos / relpri, may be called from spinlock context */
3322 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3323 thread_get_requested_qos(thread_t thread, int *relpri)
3324 {
3325 	int relprio_value = 0;
3326 	thread_qos_t qos;
3327 
3328 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3329 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3330 	if (relpri) {
3331 		*relpri = -relprio_value;
3332 	}
3333 	return qos;
3334 }
3335 
3336 /*
3337  * This function will promote the thread priority
3338  * since exec could block other threads calling
3339  * proc_find on the proc. This boost must be removed
3340  * via call to thread_clear_exec_promotion.
3341  *
3342  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3343  */
3344 void
thread_set_exec_promotion(thread_t thread)3345 thread_set_exec_promotion(thread_t thread)
3346 {
3347 	spl_t s = splsched();
3348 	thread_lock(thread);
3349 
3350 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3351 
3352 	thread_unlock(thread);
3353 	splx(s);
3354 }
3355 
3356 /*
3357  * This function will clear the exec thread
3358  * promotion set on the thread by thread_set_exec_promotion.
3359  */
3360 void
thread_clear_exec_promotion(thread_t thread)3361 thread_clear_exec_promotion(thread_t thread)
3362 {
3363 	spl_t s = splsched();
3364 	thread_lock(thread);
3365 
3366 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3367 
3368 	thread_unlock(thread);
3369 	splx(s);
3370 }
3371 
3372 #if CONFIG_SCHED_RT_ALLOW
3373 
3374 /*
3375  * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3376  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3377  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3378  */
3379 static TUNABLE(
3380 	bool,
3381 	rt_allow_policy_enabled,
3382 	"-rt-allow_policy-enable",
3383 	false
3384 	);
3385 
3386 /*
3387  * When the RT allow policy is enabled and a thread allowed to become RT,
3388  * sometimes (if the processes RT allow policy is restricted) the thread will
3389  * have a CPU limit enforced. The following two tunables determine the
3390  * parameters for that CPU limit.
3391  */
3392 
3393 /* % of the interval allowed to run. */
3394 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3395     "rt_allow_limit_percent", 70);
3396 
3397 /* The length of interval in nanoseconds. */
3398 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3399     "rt_allow_limit_interval", 10);
3400 
3401 static bool
thread_has_rt(thread_t thread)3402 thread_has_rt(thread_t thread)
3403 {
3404 	return
3405 	        thread->sched_mode == TH_MODE_REALTIME ||
3406 	        thread->saved_mode == TH_MODE_REALTIME;
3407 }
3408 
3409 /*
3410  * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3411  * up by the target thread via the ledger AST.
3412  */
3413 static void
thread_rt_set_cpulimit(thread_t thread)3414 thread_rt_set_cpulimit(thread_t thread)
3415 {
3416 	/* Force reasonable values for the cpu limit. */
3417 	const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3418 	const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3419 
3420 	thread->t_ledger_req_percentage = percent;
3421 	thread->t_ledger_req_interval_ms = interval_ms;
3422 	thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3423 
3424 	thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3425 }
3426 
3427 /* Similar to the above but removes any CPU limit. */
3428 static void
thread_rt_clear_cpulimit(thread_t thread)3429 thread_rt_clear_cpulimit(thread_t thread)
3430 {
3431 	thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3432 
3433 	thread->t_ledger_req_percentage = 0;
3434 	thread->t_ledger_req_interval_ms = 0;
3435 	thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3436 }
3437 
3438 /*
3439  * Evaluate RT policy for a thread, demoting and undemoting as needed.
3440  */
3441 void
thread_rt_evaluate(thread_t thread)3442 thread_rt_evaluate(thread_t thread)
3443 {
3444 	task_t task = get_threadtask(thread);
3445 	bool platform_binary = false;
3446 
3447 	/* If the RT allow policy is not enabled - nothing to do. */
3448 	if (!rt_allow_policy_enabled) {
3449 		return;
3450 	}
3451 
3452 	/* User threads only. */
3453 	if (task == kernel_task) {
3454 		return;
3455 	}
3456 
3457 	/* Check for platform binary. */
3458 	platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3459 
3460 	spl_t s = splsched();
3461 	thread_lock(thread);
3462 
3463 	const thread_work_interval_flags_t wi_flags =
3464 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3465 
3466 	/*
3467 	 * RT threads which are not joined to a work interval which allows RT
3468 	 * threads are demoted. Once those conditions no longer hold, the thread
3469 	 * undemoted.
3470 	 */
3471 	if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3472 		if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3473 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3474 			    thread_tid(thread));
3475 			sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3476 		}
3477 	} else {
3478 		if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3479 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3480 		}
3481 	}
3482 
3483 	/*
3484 	 * RT threads get a CPU limit unless they're part of a platform binary
3485 	 * task. If the thread is no longer RT, any existing CPU limit should be
3486 	 * removed.
3487 	 */
3488 	bool set_ast = false;
3489 	if (!platform_binary &&
3490 	    thread_has_rt(thread) &&
3491 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3492 		thread_rt_set_cpulimit(thread);
3493 		set_ast = true;
3494 	}
3495 
3496 	if (!platform_binary &&
3497 	    !thread_has_rt(thread) &&
3498 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3499 		thread_rt_clear_cpulimit(thread);
3500 		set_ast = true;
3501 	}
3502 
3503 	thread_unlock(thread);
3504 	splx(s);
3505 
3506 	if (set_ast) {
3507 		/* Ensure the target thread picks up any CPU limit change. */
3508 		act_set_astledger(thread);
3509 	}
3510 }
3511 
3512 #else
3513 
3514 void
thread_rt_evaluate(__unused thread_t thread)3515 thread_rt_evaluate(__unused thread_t thread)
3516 {
3517 }
3518 
3519 #endif /*  CONFIG_SCHED_RT_ALLOW */
3520