xref: /xnu-10002.1.13/osfmk/kern/thread_policy.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42 
43 #include <mach/machine/sdt.h>
44 
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46     struct thread_qos_override, KT_DEFAULT);
47 
48 #ifdef MACH_BSD
49 extern int      proc_selfpid(void);
50 extern char *   proc_name_address(void *p);
51 extern void     rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53 
54 #define QOS_EXTRACT(q)        ((q) & 0xff)
55 
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60 
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63 
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66 
67 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
68 
69 const struct thread_requested_policy default_thread_requested_policy = {
70 	.thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72 
73 /*
74  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75  * to threads that don't have a QoS class set.
76  */
77 const qos_policy_params_t thread_qos_policy_params = {
78 	/*
79 	 * This table defines the starting base priority of the thread,
80 	 * which will be modified by the thread importance and the task max priority
81 	 * before being applied.
82 	 */
83 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
84 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
85 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
86 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
87 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
88 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
89 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
90 
91 	/*
92 	 * This table defines the highest IO priority that a thread marked with this
93 	 * QoS class can have.
94 	 */
95 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
100 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
102 
103 	/*
104 	 * This table defines the highest QoS level that
105 	 * a thread marked with this QoS class can have.
106 	 */
107 
108 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 
116 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124 
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127 
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130 
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133 
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136 
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139 
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142 
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145 
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148 
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151 
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154 
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160 
161 
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164     task_pend_token_t pend_token)
165 {
166 	__unused int prev_qos = thread->requested_policy.thrp_qos;
167 
168 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169 
170 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173 
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 	struct task_pend_token pend_token = {};
178 
179 	thread_mtx_lock(thread);
180 	if (!thread->active) {
181 		thread_mtx_unlock(thread);
182 		return KERN_TERMINATED;
183 	}
184 
185 	thread_remove_qos_policy_locked(thread, &pend_token);
186 
187 	thread_mtx_unlock(thread);
188 
189 	thread_policy_update_complete_unlocked(thread, &pend_token);
190 
191 	return KERN_SUCCESS;
192 }
193 
194 
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 	if (thread->static_param) {
199 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 		return TRUE;
201 	}
202 	return FALSE;
203 }
204 
205 /*
206  * Relative priorities can range between 0REL and -15REL. These
207  * map to QoS-specific ranges, to create non-overlapping priority
208  * ranges.
209  */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 	int next_lower_qos;
214 
215 	/* Fast path, since no validation or scaling is needed */
216 	if (qos_relprio == 0) {
217 		return 0;
218 	}
219 
220 	switch (qos) {
221 	case THREAD_QOS_USER_INTERACTIVE:
222 		next_lower_qos = THREAD_QOS_USER_INITIATED;
223 		break;
224 	case THREAD_QOS_USER_INITIATED:
225 		next_lower_qos = THREAD_QOS_LEGACY;
226 		break;
227 	case THREAD_QOS_LEGACY:
228 		next_lower_qos = THREAD_QOS_UTILITY;
229 		break;
230 	case THREAD_QOS_UTILITY:
231 		next_lower_qos = THREAD_QOS_BACKGROUND;
232 		break;
233 	case THREAD_QOS_MAINTENANCE:
234 	case THREAD_QOS_BACKGROUND:
235 		next_lower_qos = 0;
236 		break;
237 	default:
238 		panic("Unrecognized QoS %d", qos);
239 		return 0;
240 	}
241 
242 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244 
245 	/*
246 	 * We now have the valid range that the scaled relative priority can map to. Note
247 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 	 * remainder.
251 	 */
252 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253 
254 	return scaled_relprio;
255 }
256 
257 /*
258  * flag set by -qos-policy-allow boot-arg to allow
259  * testing thread qos policy from userspace
260  */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262 
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 	thread_t                                thread,
266 	thread_policy_flavor_t  flavor,
267 	thread_policy_t                 policy_info,
268 	mach_msg_type_number_t  count)
269 {
270 	thread_qos_policy_data_t req_qos;
271 	kern_return_t kr;
272 
273 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274 
275 	if (thread == THREAD_NULL) {
276 		return KERN_INVALID_ARGUMENT;
277 	}
278 
279 	if (!allow_qos_policy_set) {
280 		if (thread_is_static_param(thread)) {
281 			return KERN_POLICY_STATIC;
282 		}
283 
284 		if (flavor == THREAD_QOS_POLICY) {
285 			return KERN_INVALID_ARGUMENT;
286 		}
287 
288 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 				return KERN_INVALID_ARGUMENT;
291 			}
292 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 			if (info->priority != BASEPRI_RTQUEUES) {
294 				return KERN_INVALID_ARGUMENT;
295 			}
296 		}
297 	}
298 
299 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 			&thread->th_work_interval_flags, relaxed);
302 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 			/* Fail requests to become realtime for threads having joined workintervals
305 			 * with workload ID that don't have the rt-allowed flag. */
306 			return KERN_INVALID_POLICY;
307 		}
308 	}
309 
310 	/* Threads without static_param set reset their QoS when other policies are applied. */
311 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 		/* Store the existing tier, if we fail this call it is used to reset back. */
313 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315 
316 		kr = thread_remove_qos_policy(thread);
317 		if (kr != KERN_SUCCESS) {
318 			return kr;
319 		}
320 	}
321 
322 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323 
324 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 		if (kr != KERN_SUCCESS) {
326 			/* Reset back to our original tier as the set failed. */
327 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 		}
329 	}
330 
331 	return kr;
332 }
333 
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338 
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 	thread_t                     thread,
342 	thread_policy_flavor_t       flavor,
343 	thread_policy_t              policy_info,
344 	mach_msg_type_number_t       count)
345 {
346 	kern_return_t result = KERN_SUCCESS;
347 	struct task_pend_token pend_token = {};
348 
349 	thread_mtx_lock(thread);
350 	if (!thread->active) {
351 		thread_mtx_unlock(thread);
352 
353 		return KERN_TERMINATED;
354 	}
355 
356 	switch (flavor) {
357 	case THREAD_EXTENDED_POLICY:
358 	{
359 		boolean_t timeshare = TRUE;
360 
361 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 			thread_extended_policy_t info;
363 
364 			info = (thread_extended_policy_t)policy_info;
365 			timeshare = info->timeshare;
366 		}
367 
368 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369 
370 		spl_t s = splsched();
371 		thread_lock(thread);
372 
373 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374 
375 		thread_unlock(thread);
376 		splx(s);
377 
378 		/*
379 		 * The thread may be demoted with RT_DISALLOWED but has just
380 		 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 		 * undemote the thread so the new sched mode takes effect.
382 		 */
383 		thread_rt_evaluate(thread);
384 
385 		pend_token.tpt_update_thread_sfi = 1;
386 
387 		break;
388 	}
389 
390 	case THREAD_TIME_CONSTRAINT_POLICY:
391 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 	{
393 		thread_time_constraint_with_priority_policy_t info;
394 
395 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398 
399 		if (count < min_count) {
400 			result = KERN_INVALID_ARGUMENT;
401 			break;
402 		}
403 
404 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
405 
406 
407 		if (info->constraint < info->computation ||
408 		    info->computation > max_rt_quantum ||
409 		    info->computation < min_rt_quantum) {
410 			result = KERN_INVALID_ARGUMENT;
411 			break;
412 		}
413 
414 		if (info->computation < (info->constraint / 2)) {
415 			info->computation = (info->constraint / 2);
416 			if (info->computation > max_rt_quantum) {
417 				info->computation = max_rt_quantum;
418 			}
419 		}
420 
421 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 				result = KERN_INVALID_ARGUMENT;
424 				break;
425 			}
426 		}
427 
428 		spl_t s = splsched();
429 		thread_lock(thread);
430 
431 		thread->realtime.period          = info->period;
432 		thread->realtime.computation     = info->computation;
433 		thread->realtime.constraint      = info->constraint;
434 		thread->realtime.preemptible     = info->preemptible;
435 
436 		/*
437 		 * If the thread has a work interval driven policy, the priority
438 		 * offset has been set by the work interval.
439 		 */
440 		if (!thread->requested_policy.thrp_wi_driven) {
441 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 				thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 			} else {
444 				thread->realtime.priority_offset = 0;
445 			}
446 		}
447 
448 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449 
450 		thread_unlock(thread);
451 		splx(s);
452 
453 		thread_rt_evaluate(thread);
454 
455 		pend_token.tpt_update_thread_sfi = 1;
456 
457 		break;
458 	}
459 
460 	case THREAD_PRECEDENCE_POLICY:
461 	{
462 		thread_precedence_policy_t info;
463 
464 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 			result = KERN_INVALID_ARGUMENT;
466 			break;
467 		}
468 		info = (thread_precedence_policy_t)policy_info;
469 
470 		spl_t s = splsched();
471 		thread_lock(thread);
472 
473 		thread->importance = info->importance;
474 
475 		thread_recompute_priority(thread);
476 
477 		thread_unlock(thread);
478 		splx(s);
479 
480 		break;
481 	}
482 
483 	case THREAD_AFFINITY_POLICY:
484 	{
485 		extern boolean_t affinity_sets_enabled;
486 		thread_affinity_policy_t info;
487 
488 		if (!affinity_sets_enabled) {
489 			result = KERN_INVALID_POLICY;
490 			break;
491 		}
492 
493 		if (!thread_affinity_is_supported()) {
494 			result = KERN_NOT_SUPPORTED;
495 			break;
496 		}
497 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 			result = KERN_INVALID_ARGUMENT;
499 			break;
500 		}
501 
502 		info = (thread_affinity_policy_t) policy_info;
503 		/*
504 		 * Unlock the thread mutex here and
505 		 * return directly after calling thread_affinity_set().
506 		 * This is necessary for correct lock ordering because
507 		 * thread_affinity_set() takes the task lock.
508 		 */
509 		thread_mtx_unlock(thread);
510 		return thread_affinity_set(thread, info->affinity_tag);
511 	}
512 
513 #if !defined(XNU_TARGET_OS_OSX)
514 	case THREAD_BACKGROUND_POLICY:
515 	{
516 		thread_background_policy_t info;
517 
518 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 			result = KERN_INVALID_ARGUMENT;
520 			break;
521 		}
522 
523 		if (get_threadtask(thread) != current_task()) {
524 			result = KERN_PROTECTION_FAILURE;
525 			break;
526 		}
527 
528 		info = (thread_background_policy_t) policy_info;
529 
530 		int enable;
531 
532 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 			enable = TASK_POLICY_ENABLE;
534 		} else {
535 			enable = TASK_POLICY_DISABLE;
536 		}
537 
538 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539 
540 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541 
542 		break;
543 	}
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545 
546 	case THREAD_THROUGHPUT_QOS_POLICY:
547 	{
548 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 		thread_throughput_qos_t tqos;
550 
551 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 			result = KERN_INVALID_ARGUMENT;
553 			break;
554 		}
555 
556 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 			break;
558 		}
559 
560 		tqos = qos_extract(info->thread_throughput_qos_tier);
561 
562 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564 
565 		break;
566 	}
567 
568 	case THREAD_LATENCY_QOS_POLICY:
569 	{
570 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 		thread_latency_qos_t lqos;
572 
573 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 			result = KERN_INVALID_ARGUMENT;
575 			break;
576 		}
577 
578 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 			break;
580 		}
581 
582 		lqos = qos_extract(info->thread_latency_qos_tier);
583 
584 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586 
587 		break;
588 	}
589 
590 	case THREAD_QOS_POLICY:
591 	{
592 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593 
594 		if (count < THREAD_QOS_POLICY_COUNT) {
595 			result = KERN_INVALID_ARGUMENT;
596 			break;
597 		}
598 
599 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 			result = KERN_INVALID_ARGUMENT;
601 			break;
602 		}
603 
604 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 			result = KERN_INVALID_ARGUMENT;
606 			break;
607 		}
608 
609 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 			result = KERN_INVALID_ARGUMENT;
611 			break;
612 		}
613 
614 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 		    info->qos_tier, -info->tier_importance, &pend_token);
616 
617 		break;
618 	}
619 
620 	default:
621 		result = KERN_INVALID_ARGUMENT;
622 		break;
623 	}
624 
625 	thread_mtx_unlock(thread);
626 
627 	thread_policy_update_complete_unlocked(thread, &pend_token);
628 
629 	return result;
630 }
631 
632 /*
633  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634  * Both result in FIXED mode scheduling.
635  */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 	switch (policy) {
640 	case POLICY_TIMESHARE:
641 		return TH_MODE_TIMESHARE;
642 	case POLICY_RR:
643 	case POLICY_FIFO:
644 		return TH_MODE_FIXED;
645 	default:
646 		panic("unexpected sched policy: %d", policy);
647 		return TH_MODE_NONE;
648 	}
649 }
650 
651 /*
652  * Called either with the thread mutex locked
653  * or from the pthread kext in a 'safe place'.
654  */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
657     sched_mode_t          mode,
658     integer_t             priority,
659     task_pend_token_t     pend_token)
660 {
661 	kern_return_t kr = KERN_SUCCESS;
662 
663 	spl_t s = splsched();
664 	thread_lock(thread);
665 
666 	/* This path isn't allowed to change a thread out of realtime. */
667 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 	    (thread->saved_mode == TH_MODE_REALTIME)) {
669 		kr = KERN_FAILURE;
670 		goto unlock;
671 	}
672 
673 	if (thread->policy_reset) {
674 		kr = KERN_SUCCESS;
675 		goto unlock;
676 	}
677 
678 	sched_mode_t old_mode = thread->sched_mode;
679 
680 	/*
681 	 * Reverse engineer and apply the correct importance value
682 	 * from the requested absolute priority value.
683 	 *
684 	 * TODO: Store the absolute priority value instead
685 	 */
686 
687 	if (priority >= thread->max_priority) {
688 		priority = thread->max_priority - thread->task_priority;
689 	} else if (priority >= MINPRI_KERNEL) {
690 		priority -=  MINPRI_KERNEL;
691 	} else if (priority >= MINPRI_RESERVED) {
692 		priority -=  MINPRI_RESERVED;
693 	} else {
694 		priority -= BASEPRI_DEFAULT;
695 	}
696 
697 	priority += thread->task_priority;
698 
699 	if (priority > thread->max_priority) {
700 		priority = thread->max_priority;
701 	} else if (priority < MINPRI) {
702 		priority = MINPRI;
703 	}
704 
705 	thread->importance = priority - thread->task_priority;
706 
707 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
708 
709 	if (mode != old_mode) {
710 		pend_token->tpt_update_thread_sfi = 1;
711 	}
712 
713 unlock:
714 	thread_unlock(thread);
715 	splx(s);
716 
717 	return kr;
718 }
719 
720 void
thread_freeze_base_pri(thread_t thread)721 thread_freeze_base_pri(thread_t thread)
722 {
723 	assert(thread == current_thread());
724 
725 	spl_t s = splsched();
726 	thread_lock(thread);
727 
728 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
729 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
730 
731 	thread_unlock(thread);
732 	splx(s);
733 }
734 
735 bool
thread_unfreeze_base_pri(thread_t thread)736 thread_unfreeze_base_pri(thread_t thread)
737 {
738 	assert(thread == current_thread());
739 	integer_t base_pri;
740 	ast_t ast = 0;
741 
742 	spl_t s = splsched();
743 	thread_lock(thread);
744 
745 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
746 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
747 
748 	base_pri = thread->req_base_pri;
749 	if (base_pri != thread->base_pri) {
750 		/*
751 		 * This function returns "true" if the base pri change
752 		 * is the most likely cause for the preemption.
753 		 */
754 		sched_set_thread_base_priority(thread, base_pri);
755 		ast = ast_peek(AST_PREEMPT);
756 	}
757 
758 	thread_unlock(thread);
759 	splx(s);
760 
761 	return ast != 0;
762 }
763 
764 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)765 thread_workq_pri_for_qos(thread_qos_t qos)
766 {
767 	assert(qos < THREAD_QOS_LAST);
768 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
769 }
770 
771 thread_qos_t
thread_workq_qos_for_pri(int priority)772 thread_workq_qos_for_pri(int priority)
773 {
774 	thread_qos_t qos;
775 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
776 		// indicate that workq should map >UI threads to workq's
777 		// internal notation for above-UI work.
778 		return THREAD_QOS_UNSPECIFIED;
779 	}
780 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
781 		// map a given priority up to the next nearest qos band.
782 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
783 			return qos;
784 		}
785 	}
786 	return THREAD_QOS_MAINTENANCE;
787 }
788 
789 /*
790  * private interface for pthread workqueues
791  *
792  * Set scheduling policy & absolute priority for thread
793  * May be called with spinlocks held
794  * Thread mutex lock is not held
795  */
796 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)797 thread_reset_workq_qos(thread_t thread, uint32_t qos)
798 {
799 	struct task_pend_token pend_token = {};
800 
801 	assert(qos < THREAD_QOS_LAST);
802 
803 	spl_t s = splsched();
804 	thread_lock(thread);
805 
806 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
807 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
808 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
809 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
810 	    &pend_token);
811 
812 	assert(pend_token.tpt_update_sockets == 0);
813 
814 	thread_unlock(thread);
815 	splx(s);
816 
817 	thread_policy_update_complete_unlocked(thread, &pend_token);
818 }
819 
820 /*
821  * private interface for pthread workqueues
822  *
823  * Set scheduling policy & absolute priority for thread
824  * May be called with spinlocks held
825  * Thread mutex lock is held
826  */
827 void
thread_set_workq_override(thread_t thread,uint32_t qos)828 thread_set_workq_override(thread_t thread, uint32_t qos)
829 {
830 	struct task_pend_token pend_token = {};
831 
832 	assert(qos < THREAD_QOS_LAST);
833 
834 	spl_t s = splsched();
835 	thread_lock(thread);
836 
837 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
838 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
839 
840 	assert(pend_token.tpt_update_sockets == 0);
841 
842 	thread_unlock(thread);
843 	splx(s);
844 
845 	thread_policy_update_complete_unlocked(thread, &pend_token);
846 }
847 
848 /*
849  * private interface for pthread workqueues
850  *
851  * Set scheduling policy & absolute priority for thread
852  * May be called with spinlocks held
853  * Thread mutex lock is not held
854  */
855 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)856 thread_set_workq_pri(thread_t  thread,
857     thread_qos_t qos,
858     integer_t priority,
859     integer_t policy)
860 {
861 	struct task_pend_token pend_token = {};
862 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
863 
864 	assert(qos < THREAD_QOS_LAST);
865 	assert(thread->static_param);
866 
867 	if (!thread->static_param || !thread->active) {
868 		return;
869 	}
870 
871 	spl_t s = splsched();
872 	thread_lock(thread);
873 
874 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
875 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
876 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
877 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
878 	    0, &pend_token);
879 
880 	thread_unlock(thread);
881 	splx(s);
882 
883 	/* Concern: this doesn't hold the mutex... */
884 
885 	__assert_only kern_return_t kr;
886 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
887 	    &pend_token);
888 	assert(kr == KERN_SUCCESS);
889 
890 	if (pend_token.tpt_update_thread_sfi) {
891 		sfi_reevaluate(thread);
892 	}
893 }
894 
895 /*
896  * thread_set_mode_and_absolute_pri:
897  *
898  * Set scheduling policy & absolute priority for thread, for deprecated
899  * thread_set_policy and thread_policy interfaces.
900  *
901  * Called with nothing locked.
902  */
903 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)904 thread_set_mode_and_absolute_pri(thread_t   thread,
905     integer_t  policy,
906     integer_t  priority)
907 {
908 	kern_return_t kr = KERN_SUCCESS;
909 	struct task_pend_token pend_token = {};
910 
911 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
912 
913 	thread_mtx_lock(thread);
914 
915 	if (!thread->active) {
916 		kr = KERN_TERMINATED;
917 		goto unlock;
918 	}
919 
920 	if (thread_is_static_param(thread)) {
921 		kr = KERN_POLICY_STATIC;
922 		goto unlock;
923 	}
924 
925 	/* Setting legacy policies on threads kills the current QoS */
926 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
927 		thread_remove_qos_policy_locked(thread, &pend_token);
928 	}
929 
930 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
931 
932 unlock:
933 	thread_mtx_unlock(thread);
934 
935 	thread_policy_update_complete_unlocked(thread, &pend_token);
936 
937 	return kr;
938 }
939 
940 /*
941  * Set the thread's requested mode and recompute priority
942  * Called with thread mutex and thread locked
943  *
944  * TODO: Mitigate potential problems caused by moving thread to end of runq
945  * whenever its priority is recomputed
946  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
947  */
948 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)949 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
950 {
951 	if (thread->policy_reset) {
952 		return;
953 	}
954 
955 	boolean_t removed = thread_run_queue_remove(thread);
956 
957 	sched_set_thread_mode_user(thread, mode);
958 
959 	thread_recompute_priority(thread);
960 
961 	if (removed) {
962 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
963 	}
964 }
965 
966 /* called at splsched with thread lock locked */
967 static void
thread_update_qos_cpu_time_locked(thread_t thread)968 thread_update_qos_cpu_time_locked(thread_t thread)
969 {
970 	task_t task = get_threadtask(thread);
971 	uint64_t timer_sum, timer_delta;
972 
973 	/*
974 	 * This is only as accurate the thread's last context switch or user/kernel
975 	 * transition (unless precise user/kernel time is disabled).
976 	 *
977 	 * TODO: Consider running an update operation here to update it first.
978 	 *       Maybe doable with interrupts disabled from current thread.
979 	 *       If the thread is on a different core, may not be easy to get right.
980 	 */
981 
982 	timer_sum = recount_thread_time_mach(thread);
983 	timer_delta = timer_sum - thread->vtimer_qos_save;
984 
985 	thread->vtimer_qos_save = timer_sum;
986 
987 	uint64_t* task_counter = NULL;
988 
989 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
990 	switch (thread->effective_policy.thep_qos) {
991 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
992 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
993 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
994 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
995 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
996 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
997 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
998 	default:
999 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1000 	}
1001 
1002 	OSAddAtomic64(timer_delta, task_counter);
1003 
1004 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1005 	switch (thread->requested_policy.thrp_qos) {
1006 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1007 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1008 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1009 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1010 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1011 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1012 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1013 	default:
1014 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1015 	}
1016 
1017 	OSAddAtomic64(timer_delta, task_counter);
1018 }
1019 
1020 /*
1021  * called with no thread locks held
1022  * may hold task lock
1023  */
1024 void
thread_update_qos_cpu_time(thread_t thread)1025 thread_update_qos_cpu_time(thread_t thread)
1026 {
1027 	thread_mtx_lock(thread);
1028 
1029 	spl_t s = splsched();
1030 	thread_lock(thread);
1031 
1032 	thread_update_qos_cpu_time_locked(thread);
1033 
1034 	thread_unlock(thread);
1035 	splx(s);
1036 
1037 	thread_mtx_unlock(thread);
1038 }
1039 
1040 /*
1041  * Calculate base priority from thread attributes, and set it on the thread
1042  *
1043  * Called with thread_lock and thread mutex held.
1044  */
1045 void
thread_recompute_priority(thread_t thread)1046 thread_recompute_priority(
1047 	thread_t                thread)
1048 {
1049 	integer_t               priority;
1050 	integer_t               adj_priority;
1051 
1052 	if (thread->policy_reset) {
1053 		return;
1054 	}
1055 
1056 	if (thread->sched_mode == TH_MODE_REALTIME) {
1057 		uint8_t i = thread->realtime.priority_offset;
1058 		assert((i >= 0) && (i < NRTQS));
1059 		priority = BASEPRI_RTQUEUES + i;
1060 
1061 		sched_set_thread_base_priority(thread, priority);
1062 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1063 			/* Make sure the thread has a valid deadline */
1064 			uint64_t ctime = mach_absolute_time();
1065 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1066 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1067 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1068 		}
1069 		return;
1070 
1071 		/*
1072 		 * A thread may have joined a RT work interval but then never
1073 		 * changed its sched mode or have been demoted. RT work
1074 		 * intervals will have RT priorities - ignore the priority if
1075 		 * the thread isn't RT.
1076 		 */
1077 	} else if (thread->effective_policy.thep_wi_driven &&
1078 	    work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1079 		priority = work_interval_get_priority(thread);
1080 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1081 		int qos = thread->effective_policy.thep_qos;
1082 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1083 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1084 		int qos_scaled_relprio;
1085 
1086 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1087 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1088 
1089 		priority = thread_qos_policy_params.qos_pri[qos];
1090 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1091 
1092 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1093 			/* Bump priority 46 to 47 when in a frontmost app */
1094 			qos_scaled_relprio += 1;
1095 		}
1096 
1097 		/* TODO: factor in renice priority here? */
1098 
1099 		priority += qos_scaled_relprio;
1100 	} else {
1101 		if (thread->importance > MAXPRI) {
1102 			priority = MAXPRI;
1103 		} else if (thread->importance < -MAXPRI) {
1104 			priority = -MAXPRI;
1105 		} else {
1106 			priority = thread->importance;
1107 		}
1108 
1109 		priority += thread->task_priority;
1110 	}
1111 
1112 	/* Boost the priority of threads which are RT demoted. */
1113 	if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1114 		priority = MAX(priority, MAXPRI_USER);
1115 	}
1116 
1117 	priority = MAX(priority, thread->user_promotion_basepri);
1118 
1119 	/*
1120 	 * Clamp priority back into the allowed range for this task.
1121 	 *  The initial priority value could be out of this range due to:
1122 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1123 	 *      Task is user task (max-pri is 63)
1124 	 *      Task is kernel task (max-pri is 95)
1125 	 * Note that thread->importance is user-settable to any integer
1126 	 * via THREAD_PRECEDENCE_POLICY.
1127 	 */
1128 	adj_priority = priority;
1129 	adj_priority = MIN(adj_priority, thread->max_priority);
1130 	adj_priority = MAX(adj_priority, MINPRI);
1131 
1132 	/* Allow workload driven priorities to exceed max_priority. */
1133 	if (thread->effective_policy.thep_wi_driven) {
1134 		adj_priority = MAX(adj_priority, priority);
1135 	}
1136 
1137 	/* Allow priority to exceed max_priority for promotions. */
1138 	if (thread->effective_policy.thep_promote_above_task) {
1139 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1140 	}
1141 	priority = adj_priority;
1142 	assert3u(priority, <=, MAXPRI);
1143 
1144 	if (thread->saved_mode == TH_MODE_REALTIME &&
1145 	    sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1146 		priority = DEPRESSPRI;
1147 	}
1148 
1149 	if (thread->effective_policy.thep_terminated == TRUE) {
1150 		/*
1151 		 * We temporarily want to override the expected priority to
1152 		 * ensure that the thread exits in a timely manner.
1153 		 * Note that this is allowed to exceed thread->max_priority
1154 		 * so that the thread is no longer clamped to background
1155 		 * during the final exit phase.
1156 		 */
1157 		if (priority < thread->task_priority) {
1158 			priority = thread->task_priority;
1159 		}
1160 		if (priority < BASEPRI_DEFAULT) {
1161 			priority = BASEPRI_DEFAULT;
1162 		}
1163 	}
1164 
1165 #if !defined(XNU_TARGET_OS_OSX)
1166 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1167 	if (priority < MAXPRI_THROTTLE) {
1168 		priority = MAXPRI_THROTTLE;
1169 	}
1170 #endif /* !defined(XNU_TARGET_OS_OSX) */
1171 
1172 	sched_set_thread_base_priority(thread, priority);
1173 }
1174 
1175 /* Called with the task lock held, but not the thread mutex or spinlock */
1176 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1177 thread_policy_update_tasklocked(
1178 	thread_t           thread,
1179 	integer_t          priority,
1180 	integer_t          max_priority,
1181 	task_pend_token_t  pend_token)
1182 {
1183 	thread_mtx_lock(thread);
1184 
1185 	if (!thread->active || thread->policy_reset) {
1186 		thread_mtx_unlock(thread);
1187 		return;
1188 	}
1189 
1190 	spl_t s = splsched();
1191 	thread_lock(thread);
1192 
1193 	__unused
1194 	integer_t old_max_priority = thread->max_priority;
1195 
1196 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1197 	thread->task_priority = (int16_t)priority;
1198 
1199 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1200 	thread->max_priority = (int16_t)max_priority;
1201 
1202 	/*
1203 	 * When backgrounding a thread, realtime and fixed priority threads
1204 	 * should be demoted to timeshare background threads.
1205 	 *
1206 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1207 	 * remove/reinsert for a runnable thread
1208 	 */
1209 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1210 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1211 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1212 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1213 	}
1214 
1215 	thread_policy_update_spinlocked(thread, true, pend_token);
1216 
1217 	thread_unlock(thread);
1218 	splx(s);
1219 
1220 	thread_mtx_unlock(thread);
1221 }
1222 
1223 /*
1224  * Reset thread to default state in preparation for termination
1225  * Called with thread mutex locked
1226  *
1227  * Always called on current thread, so we don't need a run queue remove
1228  */
1229 void
thread_policy_reset(thread_t thread)1230 thread_policy_reset(
1231 	thread_t                thread)
1232 {
1233 	spl_t           s;
1234 
1235 	assert(thread == current_thread());
1236 
1237 	s = splsched();
1238 	thread_lock(thread);
1239 
1240 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1241 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1242 	}
1243 
1244 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1245 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1246 	}
1247 
1248 	if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1249 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1250 	}
1251 
1252 	/* At this point, the various demotions should be inactive */
1253 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1254 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1255 
1256 	/* Reset thread back to task-default basepri and mode  */
1257 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1258 
1259 	sched_set_thread_mode(thread, newmode);
1260 
1261 	thread->importance = 0;
1262 
1263 	/* Prevent further changes to thread base priority or mode */
1264 	thread->policy_reset = 1;
1265 
1266 	sched_set_thread_base_priority(thread, thread->task_priority);
1267 
1268 	thread_unlock(thread);
1269 	splx(s);
1270 }
1271 
1272 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1273 thread_policy_get(
1274 	thread_t                                thread,
1275 	thread_policy_flavor_t  flavor,
1276 	thread_policy_t                 policy_info,
1277 	mach_msg_type_number_t  *count,
1278 	boolean_t                               *get_default)
1279 {
1280 	kern_return_t                   result = KERN_SUCCESS;
1281 
1282 	if (thread == THREAD_NULL) {
1283 		return KERN_INVALID_ARGUMENT;
1284 	}
1285 
1286 	thread_mtx_lock(thread);
1287 	if (!thread->active) {
1288 		thread_mtx_unlock(thread);
1289 
1290 		return KERN_TERMINATED;
1291 	}
1292 
1293 	switch (flavor) {
1294 	case THREAD_EXTENDED_POLICY:
1295 	{
1296 		boolean_t               timeshare = TRUE;
1297 
1298 		if (!(*get_default)) {
1299 			spl_t s = splsched();
1300 			thread_lock(thread);
1301 
1302 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1303 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1304 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1305 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1306 				} else {
1307 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1308 				}
1309 			} else {
1310 				*get_default = TRUE;
1311 			}
1312 
1313 			thread_unlock(thread);
1314 			splx(s);
1315 		}
1316 
1317 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1318 			thread_extended_policy_t        info;
1319 
1320 			info = (thread_extended_policy_t)policy_info;
1321 			info->timeshare = timeshare;
1322 		}
1323 
1324 		break;
1325 	}
1326 
1327 	case THREAD_TIME_CONSTRAINT_POLICY:
1328 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1329 	{
1330 		thread_time_constraint_with_priority_policy_t         info;
1331 
1332 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1333 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1334 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1335 
1336 		if (*count < min_count) {
1337 			result = KERN_INVALID_ARGUMENT;
1338 			break;
1339 		}
1340 
1341 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1342 
1343 		if (!(*get_default)) {
1344 			spl_t s = splsched();
1345 			thread_lock(thread);
1346 
1347 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1348 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1349 				info->period = thread->realtime.period;
1350 				info->computation = thread->realtime.computation;
1351 				info->constraint = thread->realtime.constraint;
1352 				info->preemptible = thread->realtime.preemptible;
1353 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1354 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1355 				}
1356 			} else {
1357 				*get_default = TRUE;
1358 			}
1359 
1360 			thread_unlock(thread);
1361 			splx(s);
1362 		}
1363 
1364 		if (*get_default) {
1365 			info->period = 0;
1366 			info->computation = default_timeshare_computation;
1367 			info->constraint = default_timeshare_constraint;
1368 			info->preemptible = TRUE;
1369 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1370 				info->priority = BASEPRI_RTQUEUES;
1371 			}
1372 		}
1373 
1374 
1375 		break;
1376 	}
1377 
1378 	case THREAD_PRECEDENCE_POLICY:
1379 	{
1380 		thread_precedence_policy_t              info;
1381 
1382 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1383 			result = KERN_INVALID_ARGUMENT;
1384 			break;
1385 		}
1386 
1387 		info = (thread_precedence_policy_t)policy_info;
1388 
1389 		if (!(*get_default)) {
1390 			spl_t s = splsched();
1391 			thread_lock(thread);
1392 
1393 			info->importance = thread->importance;
1394 
1395 			thread_unlock(thread);
1396 			splx(s);
1397 		} else {
1398 			info->importance = 0;
1399 		}
1400 
1401 		break;
1402 	}
1403 
1404 	case THREAD_AFFINITY_POLICY:
1405 	{
1406 		thread_affinity_policy_t                info;
1407 
1408 		if (!thread_affinity_is_supported()) {
1409 			result = KERN_NOT_SUPPORTED;
1410 			break;
1411 		}
1412 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1413 			result = KERN_INVALID_ARGUMENT;
1414 			break;
1415 		}
1416 
1417 		info = (thread_affinity_policy_t)policy_info;
1418 
1419 		if (!(*get_default)) {
1420 			info->affinity_tag = thread_affinity_get(thread);
1421 		} else {
1422 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1423 		}
1424 
1425 		break;
1426 	}
1427 
1428 	case THREAD_POLICY_STATE:
1429 	{
1430 		thread_policy_state_t           info;
1431 
1432 		if (*count < THREAD_POLICY_STATE_COUNT) {
1433 			result = KERN_INVALID_ARGUMENT;
1434 			break;
1435 		}
1436 
1437 		/* Only root can get this info */
1438 		if (!task_is_privileged(current_task())) {
1439 			result = KERN_PROTECTION_FAILURE;
1440 			break;
1441 		}
1442 
1443 		info = (thread_policy_state_t)(void*)policy_info;
1444 
1445 		if (!(*get_default)) {
1446 			info->flags = 0;
1447 
1448 			spl_t s = splsched();
1449 			thread_lock(thread);
1450 
1451 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1452 
1453 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1454 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1455 
1456 			info->thps_user_promotions          = 0;
1457 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1458 			info->thps_ipc_overrides            = thread->kevent_overrides;
1459 
1460 			proc_get_thread_policy_bitfield(thread, info);
1461 
1462 			thread_unlock(thread);
1463 			splx(s);
1464 		} else {
1465 			info->requested = 0;
1466 			info->effective = 0;
1467 			info->pending = 0;
1468 		}
1469 
1470 		break;
1471 	}
1472 
1473 	case THREAD_REQUESTED_STATE_POLICY:
1474 	{
1475 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1476 			result = KERN_INVALID_ARGUMENT;
1477 			break;
1478 		}
1479 
1480 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1481 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1482 
1483 		info->thrq_base_qos = req_policy->thrp_qos;
1484 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1485 		info->thrq_qos_override = req_policy->thrp_qos_override;
1486 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1487 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1488 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1489 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1490 
1491 		break;
1492 	}
1493 
1494 	case THREAD_LATENCY_QOS_POLICY:
1495 	{
1496 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1497 		thread_latency_qos_t plqos;
1498 
1499 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1500 			result = KERN_INVALID_ARGUMENT;
1501 			break;
1502 		}
1503 
1504 		if (*get_default) {
1505 			plqos = 0;
1506 		} else {
1507 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1508 		}
1509 
1510 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1511 	}
1512 	break;
1513 
1514 	case THREAD_THROUGHPUT_QOS_POLICY:
1515 	{
1516 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1517 		thread_throughput_qos_t ptqos;
1518 
1519 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1520 			result = KERN_INVALID_ARGUMENT;
1521 			break;
1522 		}
1523 
1524 		if (*get_default) {
1525 			ptqos = 0;
1526 		} else {
1527 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1528 		}
1529 
1530 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1531 	}
1532 	break;
1533 
1534 	case THREAD_QOS_POLICY:
1535 	{
1536 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1537 
1538 		if (*count < THREAD_QOS_POLICY_COUNT) {
1539 			result = KERN_INVALID_ARGUMENT;
1540 			break;
1541 		}
1542 
1543 		if (!(*get_default)) {
1544 			int relprio_value = 0;
1545 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1546 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1547 
1548 			info->tier_importance = -relprio_value;
1549 		} else {
1550 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1551 			info->tier_importance = 0;
1552 		}
1553 
1554 		break;
1555 	}
1556 
1557 	default:
1558 		result = KERN_INVALID_ARGUMENT;
1559 		break;
1560 	}
1561 
1562 	thread_mtx_unlock(thread);
1563 
1564 	return result;
1565 }
1566 
1567 void
thread_policy_create(thread_t thread)1568 thread_policy_create(thread_t thread)
1569 {
1570 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1571 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1572 	    thread_tid(thread), theffective_0(thread),
1573 	    theffective_1(thread), thread->base_pri, 0);
1574 
1575 	/* We pass a pend token but ignore it */
1576 	struct task_pend_token pend_token = {};
1577 
1578 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1579 
1580 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1581 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1582 	    thread_tid(thread), theffective_0(thread),
1583 	    theffective_1(thread), thread->base_pri, 0);
1584 }
1585 
1586 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1587 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1588 {
1589 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1590 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1591 	    thread_tid(thread), theffective_0(thread),
1592 	    theffective_1(thread), thread->base_pri, 0);
1593 
1594 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1595 
1596 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1597 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1598 	    thread_tid(thread), theffective_0(thread),
1599 	    theffective_1(thread), thread->base_pri, 0);
1600 }
1601 
1602 
1603 
1604 /*
1605  * One thread state update function TO RULE THEM ALL
1606  *
1607  * This function updates the thread effective policy fields
1608  * and pushes the results to the relevant subsystems.
1609  *
1610  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1611  */
1612 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1613 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1614     task_pend_token_t pend_token)
1615 {
1616 	/*
1617 	 * Step 1:
1618 	 *  Gather requested policy and effective task state
1619 	 */
1620 
1621 	const struct thread_requested_policy requested = thread->requested_policy;
1622 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1623 
1624 	/*
1625 	 * Step 2:
1626 	 *  Calculate new effective policies from requested policy, task and thread state
1627 	 *  Rules:
1628 	 *      Don't change requested, it won't take effect
1629 	 */
1630 
1631 	struct thread_effective_policy next = {};
1632 
1633 	next.thep_wi_driven = requested.thrp_wi_driven;
1634 
1635 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1636 
1637 	uint32_t next_qos = requested.thrp_qos;
1638 
1639 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1640 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1641 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1642 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1643 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1644 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1645 	}
1646 
1647 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1648 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1649 		/*
1650 		 * This thread is turnstile-boosted higher than the adaptive clamp
1651 		 * by a synchronous waiter. Allow that to override the adaptive
1652 		 * clamp temporarily for this thread only.
1653 		 */
1654 		next.thep_promote_above_task = true;
1655 		next_qos = requested.thrp_qos_promote;
1656 	}
1657 
1658 	next.thep_qos = next_qos;
1659 
1660 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1661 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1662 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1663 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1664 		} else {
1665 			next.thep_qos = task_effective.tep_qos_clamp;
1666 		}
1667 		next.thep_wi_driven = 0;
1668 	}
1669 
1670 	/*
1671 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1672 	 * This allows QoS promotions to work properly even after the process is unclamped.
1673 	 */
1674 	next.thep_qos_promote = next.thep_qos;
1675 
1676 	/* The ceiling only applies to threads that are in the QoS world */
1677 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1678 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1679 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1680 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1681 	}
1682 
1683 	/*
1684 	 * The QoS relative priority is only applicable when the original programmer's
1685 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1686 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1687 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1688 	 * presence of boosting, the programmer doesn't know what other actors
1689 	 * are boosting the thread.
1690 	 */
1691 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1692 	    (requested.thrp_qos == next.thep_qos) &&
1693 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1694 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1695 	} else {
1696 		next.thep_qos_relprio = 0;
1697 	}
1698 
1699 	/* Calculate DARWIN_BG */
1700 	bool wants_darwinbg        = false;
1701 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1702 
1703 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1704 		wants_darwinbg = true;
1705 	}
1706 
1707 	/*
1708 	 * If DARWIN_BG has been requested at either level, it's engaged.
1709 	 * darwinbg threads always create bg sockets,
1710 	 * but only some types of darwinbg change the sockets
1711 	 * after they're created
1712 	 */
1713 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1714 		wants_all_sockets_bg = wants_darwinbg = true;
1715 	}
1716 
1717 	if (requested.thrp_pidbind_bg) {
1718 		wants_all_sockets_bg = wants_darwinbg = true;
1719 	}
1720 
1721 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1722 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1723 		wants_darwinbg = true;
1724 	}
1725 
1726 	/* Calculate side effects of DARWIN_BG */
1727 
1728 	if (wants_darwinbg) {
1729 		next.thep_darwinbg = 1;
1730 		next.thep_wi_driven = 0;
1731 	}
1732 
1733 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1734 		next.thep_new_sockets_bg = 1;
1735 	}
1736 
1737 	/* Don't use task_effective.tep_all_sockets_bg here */
1738 	if (wants_all_sockets_bg) {
1739 		next.thep_all_sockets_bg = 1;
1740 	}
1741 
1742 	/* darwinbg implies background QOS (or lower) */
1743 	if (next.thep_darwinbg &&
1744 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1745 		next.thep_qos = THREAD_QOS_BACKGROUND;
1746 		next.thep_qos_relprio = 0;
1747 	}
1748 
1749 	/* Calculate IO policy */
1750 
1751 	int iopol = THROTTLE_LEVEL_TIER0;
1752 
1753 	/* Factor in the task's IO policy */
1754 	if (next.thep_darwinbg) {
1755 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1756 	}
1757 
1758 	if (!next.thep_promote_above_task) {
1759 		iopol = MAX(iopol, task_effective.tep_io_tier);
1760 	}
1761 
1762 	/* Look up the associated IO tier value for the QoS class */
1763 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1764 
1765 	iopol = MAX(iopol, requested.thrp_int_iotier);
1766 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1767 
1768 	/* Apply the kevent iotier override */
1769 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1770 
1771 	next.thep_io_tier = iopol;
1772 
1773 	/*
1774 	 * If a QoS override is causing IO to go into a lower tier, we also set
1775 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1776 	 * window when the override goes away.
1777 	 */
1778 
1779 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1780 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1781 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1782 
1783 	/* Calculate Passive IO policy */
1784 	if (requested.thrp_ext_iopassive ||
1785 	    requested.thrp_int_iopassive ||
1786 	    qos_io_override_active ||
1787 	    task_effective.tep_io_passive) {
1788 		next.thep_io_passive = 1;
1789 	}
1790 
1791 	/* Calculate timer QOS */
1792 	uint32_t latency_qos = requested.thrp_latency_qos;
1793 
1794 	if (!next.thep_promote_above_task) {
1795 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1796 	}
1797 
1798 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1799 
1800 	next.thep_latency_qos = latency_qos;
1801 
1802 	/* Calculate throughput QOS */
1803 	uint32_t through_qos = requested.thrp_through_qos;
1804 
1805 	if (!next.thep_promote_above_task) {
1806 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1807 	}
1808 
1809 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1810 
1811 	next.thep_through_qos = through_qos;
1812 
1813 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1814 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1815 		next.thep_terminated    = 1;
1816 		next.thep_darwinbg      = 0;
1817 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1818 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1819 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1820 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1821 		next.thep_wi_driven     = 0;
1822 	}
1823 
1824 	/*
1825 	 * Step 3:
1826 	 *  Swap out old policy for new policy
1827 	 */
1828 
1829 	struct thread_effective_policy prev = thread->effective_policy;
1830 
1831 	thread_update_qos_cpu_time_locked(thread);
1832 
1833 	/* This is the point where the new values become visible to other threads */
1834 	thread->effective_policy = next;
1835 
1836 	/*
1837 	 * Step 4:
1838 	 *  Pend updates that can't be done while holding the thread lock
1839 	 */
1840 
1841 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1842 		pend_token->tpt_update_sockets = 1;
1843 	}
1844 
1845 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1846 	if (prev.thep_io_tier != next.thep_io_tier) {
1847 		pend_token->tpt_update_throttle = 1;
1848 	}
1849 
1850 	/*
1851 	 * Check for the attributes that sfi_thread_classify() consults,
1852 	 *  and trigger SFI re-evaluation.
1853 	 */
1854 	if (prev.thep_qos != next.thep_qos ||
1855 	    prev.thep_darwinbg != next.thep_darwinbg) {
1856 		pend_token->tpt_update_thread_sfi = 1;
1857 	}
1858 
1859 	integer_t old_base_pri = thread->base_pri;
1860 
1861 	/*
1862 	 * Step 5:
1863 	 *  Update other subsystems as necessary if something has changed
1864 	 */
1865 
1866 	/* Check for the attributes that thread_recompute_priority() consults */
1867 	if (prev.thep_qos != next.thep_qos ||
1868 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1869 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1870 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1871 	    prev.thep_terminated != next.thep_terminated ||
1872 	    prev.thep_wi_driven != next.thep_wi_driven ||
1873 	    pend_token->tpt_force_recompute_pri == 1 ||
1874 	    recompute_priority) {
1875 		thread_recompute_priority(thread);
1876 	}
1877 
1878 	/*
1879 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1880 	 */
1881 	if (pend_token->tpt_update_turnstile &&
1882 	    ((old_base_pri == thread->base_pri) ||
1883 	    !thread_get_waiting_turnstile(thread))) {
1884 		/*
1885 		 * Reset update turnstile pend token since either
1886 		 * the thread priority did not change or thread is
1887 		 * not blocked on a turnstile.
1888 		 */
1889 		pend_token->tpt_update_turnstile = 0;
1890 	}
1891 }
1892 
1893 
1894 /*
1895  * Initiate a thread policy state transition on a thread with its TID
1896  * Useful if you cannot guarantee the thread won't get terminated
1897  * Precondition: No locks are held
1898  * Will take task lock - using the non-tid variant is faster
1899  * if you already have a thread ref.
1900  */
1901 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1902 proc_set_thread_policy_with_tid(task_t     task,
1903     uint64_t   tid,
1904     int        category,
1905     int        flavor,
1906     int        value)
1907 {
1908 	/* takes task lock, returns ref'ed thread or NULL */
1909 	thread_t thread = task_findtid(task, tid);
1910 
1911 	if (thread == THREAD_NULL) {
1912 		return;
1913 	}
1914 
1915 	proc_set_thread_policy(thread, category, flavor, value);
1916 
1917 	thread_deallocate(thread);
1918 }
1919 
1920 /*
1921  * Initiate a thread policy transition on a thread
1922  * This path supports networking transitions (i.e. darwinbg transitions)
1923  * Precondition: No locks are held
1924  */
1925 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1926 proc_set_thread_policy(thread_t   thread,
1927     int        category,
1928     int        flavor,
1929     int        value)
1930 {
1931 	proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1932 }
1933 
1934 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1935 proc_set_thread_policy_ext(thread_t   thread,
1936     int        category,
1937     int        flavor,
1938     int        value,
1939     int        value2)
1940 {
1941 	struct task_pend_token pend_token = {};
1942 
1943 	thread_mtx_lock(thread);
1944 
1945 	proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1946 
1947 	thread_mtx_unlock(thread);
1948 
1949 	thread_policy_update_complete_unlocked(thread, &pend_token);
1950 }
1951 
1952 /*
1953  * Do the things that can't be done while holding a thread mutex.
1954  * These are set up to call back into thread policy to get the latest value,
1955  * so they don't have to be synchronized with the update.
1956  * The only required semantic is 'call this sometime after updating effective policy'
1957  *
1958  * Precondition: Thread mutex is not held
1959  *
1960  * This may be called with the task lock held, but in that case it won't be
1961  * called with tpt_update_sockets set.
1962  */
1963 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1964 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1965 {
1966 #ifdef MACH_BSD
1967 	if (pend_token->tpt_update_sockets) {
1968 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1969 	}
1970 #endif /* MACH_BSD */
1971 
1972 	if (pend_token->tpt_update_throttle) {
1973 		rethrottle_thread(get_bsdthread_info(thread));
1974 	}
1975 
1976 	if (pend_token->tpt_update_thread_sfi) {
1977 		sfi_reevaluate(thread);
1978 	}
1979 
1980 	if (pend_token->tpt_update_turnstile) {
1981 		turnstile_update_thread_priority_chain(thread);
1982 	}
1983 }
1984 
1985 /*
1986  * Set and update thread policy
1987  * Thread mutex might be held
1988  */
1989 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1990 proc_set_thread_policy_locked(thread_t          thread,
1991     int               category,
1992     int               flavor,
1993     int               value,
1994     int               value2,
1995     task_pend_token_t pend_token)
1996 {
1997 	spl_t s = splsched();
1998 	thread_lock(thread);
1999 
2000 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2001 
2002 	thread_unlock(thread);
2003 	splx(s);
2004 }
2005 
2006 /*
2007  * Set and update thread policy
2008  * Thread spinlock is held
2009  */
2010 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2011 proc_set_thread_policy_spinlocked(thread_t          thread,
2012     int               category,
2013     int               flavor,
2014     int               value,
2015     int               value2,
2016     task_pend_token_t pend_token)
2017 {
2018 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2019 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2020 	    thread_tid(thread), threquested_0(thread),
2021 	    threquested_1(thread), value, 0);
2022 
2023 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2024 
2025 	thread_policy_update_spinlocked(thread, false, pend_token);
2026 
2027 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2028 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2029 	    thread_tid(thread), threquested_0(thread),
2030 	    threquested_1(thread), tpending(pend_token), 0);
2031 }
2032 
2033 /*
2034  * Set the requested state for a specific flavor to a specific value.
2035  */
2036 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2037 thread_set_requested_policy_spinlocked(thread_t     thread,
2038     int               category,
2039     int               flavor,
2040     int               value,
2041     int               value2,
2042     task_pend_token_t pend_token)
2043 {
2044 	int tier, passive;
2045 
2046 	struct thread_requested_policy requested = thread->requested_policy;
2047 
2048 	switch (flavor) {
2049 	/* Category: EXTERNAL and INTERNAL, thread and task */
2050 
2051 	case TASK_POLICY_DARWIN_BG:
2052 		if (category == TASK_POLICY_EXTERNAL) {
2053 			requested.thrp_ext_darwinbg = value;
2054 		} else {
2055 			requested.thrp_int_darwinbg = value;
2056 		}
2057 		pend_token->tpt_update_turnstile = 1;
2058 		break;
2059 
2060 	case TASK_POLICY_IOPOL:
2061 		proc_iopol_to_tier(value, &tier, &passive);
2062 		if (category == TASK_POLICY_EXTERNAL) {
2063 			requested.thrp_ext_iotier  = tier;
2064 			requested.thrp_ext_iopassive = passive;
2065 		} else {
2066 			requested.thrp_int_iotier  = tier;
2067 			requested.thrp_int_iopassive = passive;
2068 		}
2069 		break;
2070 
2071 	case TASK_POLICY_IO:
2072 		if (category == TASK_POLICY_EXTERNAL) {
2073 			requested.thrp_ext_iotier = value;
2074 		} else {
2075 			requested.thrp_int_iotier = value;
2076 		}
2077 		break;
2078 
2079 	case TASK_POLICY_PASSIVE_IO:
2080 		if (category == TASK_POLICY_EXTERNAL) {
2081 			requested.thrp_ext_iopassive = value;
2082 		} else {
2083 			requested.thrp_int_iopassive = value;
2084 		}
2085 		break;
2086 
2087 	/* Category: ATTRIBUTE, thread only */
2088 
2089 	case TASK_POLICY_PIDBIND_BG:
2090 		assert(category == TASK_POLICY_ATTRIBUTE);
2091 		requested.thrp_pidbind_bg = value;
2092 		pend_token->tpt_update_turnstile = 1;
2093 		break;
2094 
2095 	case TASK_POLICY_LATENCY_QOS:
2096 		assert(category == TASK_POLICY_ATTRIBUTE);
2097 		requested.thrp_latency_qos = value;
2098 		break;
2099 
2100 	case TASK_POLICY_THROUGH_QOS:
2101 		assert(category == TASK_POLICY_ATTRIBUTE);
2102 		requested.thrp_through_qos = value;
2103 		break;
2104 
2105 	case TASK_POLICY_QOS_OVERRIDE:
2106 		assert(category == TASK_POLICY_ATTRIBUTE);
2107 		requested.thrp_qos_override = value;
2108 		pend_token->tpt_update_turnstile = 1;
2109 		break;
2110 
2111 	case TASK_POLICY_QOS_AND_RELPRIO:
2112 		assert(category == TASK_POLICY_ATTRIBUTE);
2113 		requested.thrp_qos = value;
2114 		requested.thrp_qos_relprio = value2;
2115 		pend_token->tpt_update_turnstile = 1;
2116 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2117 		break;
2118 
2119 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2120 		assert(category == TASK_POLICY_ATTRIBUTE);
2121 		requested.thrp_qos_workq_override = value;
2122 		pend_token->tpt_update_turnstile = 1;
2123 		break;
2124 
2125 	case TASK_POLICY_QOS_PROMOTE:
2126 		assert(category == TASK_POLICY_ATTRIBUTE);
2127 		requested.thrp_qos_promote = value;
2128 		break;
2129 
2130 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2131 		assert(category == TASK_POLICY_ATTRIBUTE);
2132 		requested.thrp_qos_kevent_override = value;
2133 		pend_token->tpt_update_turnstile = 1;
2134 		break;
2135 
2136 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2137 		assert(category == TASK_POLICY_ATTRIBUTE);
2138 		requested.thrp_qos_wlsvc_override = value;
2139 		pend_token->tpt_update_turnstile = 1;
2140 		break;
2141 
2142 	case TASK_POLICY_TERMINATED:
2143 		assert(category == TASK_POLICY_ATTRIBUTE);
2144 		requested.thrp_terminated = value;
2145 		break;
2146 
2147 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2148 		assert(category == TASK_POLICY_ATTRIBUTE);
2149 		requested.thrp_iotier_kevent_override = value;
2150 		break;
2151 
2152 	case TASK_POLICY_WI_DRIVEN:
2153 		assert(category == TASK_POLICY_ATTRIBUTE);
2154 		assert(thread == current_thread());
2155 
2156 		const bool set_policy = value;
2157 		const sched_mode_t mode = value2;
2158 
2159 		requested.thrp_wi_driven = set_policy ? 1 : 0;
2160 
2161 		/*
2162 		 * No sched mode change for REALTIME (threads must explicitly
2163 		 * opt-in), however the priority_offset needs to be updated.
2164 		 */
2165 		if (mode == TH_MODE_REALTIME) {
2166 			const int pri = work_interval_get_priority(thread);
2167 			assert3u(pri, >=, BASEPRI_RTQUEUES);
2168 			thread->realtime.priority_offset = set_policy ?
2169 			    (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2170 		} else {
2171 			sched_set_thread_mode_user(thread, mode);
2172 			if (set_policy) {
2173 				thread->static_param = true;
2174 			}
2175 		}
2176 		break;
2177 
2178 	default:
2179 		panic("unknown task policy: %d %d %d", category, flavor, value);
2180 		break;
2181 	}
2182 
2183 	thread->requested_policy = requested;
2184 }
2185 
2186 /*
2187  * Gets what you set. Effective values may be different.
2188  * Precondition: No locks are held
2189  */
2190 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2191 proc_get_thread_policy(thread_t   thread,
2192     int        category,
2193     int        flavor)
2194 {
2195 	int value = 0;
2196 	thread_mtx_lock(thread);
2197 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2198 	thread_mtx_unlock(thread);
2199 	return value;
2200 }
2201 
2202 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2203 proc_get_thread_policy_locked(thread_t   thread,
2204     int        category,
2205     int        flavor,
2206     int*       value2)
2207 {
2208 	int value = 0;
2209 
2210 	spl_t s = splsched();
2211 	thread_lock(thread);
2212 
2213 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2214 
2215 	thread_unlock(thread);
2216 	splx(s);
2217 
2218 	return value;
2219 }
2220 
2221 /*
2222  * Gets what you set. Effective values may be different.
2223  */
2224 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2225 thread_get_requested_policy_spinlocked(thread_t thread,
2226     int      category,
2227     int      flavor,
2228     int*     value2)
2229 {
2230 	int value = 0;
2231 
2232 	struct thread_requested_policy requested = thread->requested_policy;
2233 
2234 	switch (flavor) {
2235 	case TASK_POLICY_DARWIN_BG:
2236 		if (category == TASK_POLICY_EXTERNAL) {
2237 			value = requested.thrp_ext_darwinbg;
2238 		} else {
2239 			value = requested.thrp_int_darwinbg;
2240 		}
2241 		break;
2242 	case TASK_POLICY_IOPOL:
2243 		if (category == TASK_POLICY_EXTERNAL) {
2244 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2245 			    requested.thrp_ext_iopassive);
2246 		} else {
2247 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2248 			    requested.thrp_int_iopassive);
2249 		}
2250 		break;
2251 	case TASK_POLICY_IO:
2252 		if (category == TASK_POLICY_EXTERNAL) {
2253 			value = requested.thrp_ext_iotier;
2254 		} else {
2255 			value = requested.thrp_int_iotier;
2256 		}
2257 		break;
2258 	case TASK_POLICY_PASSIVE_IO:
2259 		if (category == TASK_POLICY_EXTERNAL) {
2260 			value = requested.thrp_ext_iopassive;
2261 		} else {
2262 			value = requested.thrp_int_iopassive;
2263 		}
2264 		break;
2265 	case TASK_POLICY_QOS:
2266 		assert(category == TASK_POLICY_ATTRIBUTE);
2267 		value = requested.thrp_qos;
2268 		break;
2269 	case TASK_POLICY_QOS_OVERRIDE:
2270 		assert(category == TASK_POLICY_ATTRIBUTE);
2271 		value = requested.thrp_qos_override;
2272 		break;
2273 	case TASK_POLICY_LATENCY_QOS:
2274 		assert(category == TASK_POLICY_ATTRIBUTE);
2275 		value = requested.thrp_latency_qos;
2276 		break;
2277 	case TASK_POLICY_THROUGH_QOS:
2278 		assert(category == TASK_POLICY_ATTRIBUTE);
2279 		value = requested.thrp_through_qos;
2280 		break;
2281 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2282 		assert(category == TASK_POLICY_ATTRIBUTE);
2283 		value = requested.thrp_qos_workq_override;
2284 		break;
2285 	case TASK_POLICY_QOS_AND_RELPRIO:
2286 		assert(category == TASK_POLICY_ATTRIBUTE);
2287 		assert(value2 != NULL);
2288 		value = requested.thrp_qos;
2289 		*value2 = requested.thrp_qos_relprio;
2290 		break;
2291 	case TASK_POLICY_QOS_PROMOTE:
2292 		assert(category == TASK_POLICY_ATTRIBUTE);
2293 		value = requested.thrp_qos_promote;
2294 		break;
2295 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2296 		assert(category == TASK_POLICY_ATTRIBUTE);
2297 		value = requested.thrp_qos_kevent_override;
2298 		break;
2299 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2300 		assert(category == TASK_POLICY_ATTRIBUTE);
2301 		value = requested.thrp_qos_wlsvc_override;
2302 		break;
2303 	case TASK_POLICY_TERMINATED:
2304 		assert(category == TASK_POLICY_ATTRIBUTE);
2305 		value = requested.thrp_terminated;
2306 		break;
2307 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2308 		assert(category == TASK_POLICY_ATTRIBUTE);
2309 		value = requested.thrp_iotier_kevent_override;
2310 		break;
2311 
2312 	case TASK_POLICY_WI_DRIVEN:
2313 		assert(category == TASK_POLICY_ATTRIBUTE);
2314 		value = requested.thrp_wi_driven;
2315 		break;
2316 
2317 	default:
2318 		panic("unknown policy_flavor %d", flavor);
2319 		break;
2320 	}
2321 
2322 	return value;
2323 }
2324 
2325 /*
2326  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2327  *
2328  * NOTE: This accessor does not take the task or thread lock.
2329  * Notifications of state updates need to be externally synchronized with state queries.
2330  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2331  * within the context of a timer interrupt.
2332  *
2333  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2334  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2335  *      I don't think that cost is worth not having the right answer.
2336  */
2337 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2338 proc_get_effective_thread_policy(thread_t thread,
2339     int      flavor)
2340 {
2341 	int value = 0;
2342 
2343 	switch (flavor) {
2344 	case TASK_POLICY_DARWIN_BG:
2345 		/*
2346 		 * This call is used within the timer layer, as well as
2347 		 * prioritizing requests to the graphics system.
2348 		 * It also informs SFI and originator-bg-state.
2349 		 * Returns 1 for background mode, 0 for normal mode
2350 		 */
2351 
2352 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2353 		break;
2354 	case TASK_POLICY_IO:
2355 		/*
2356 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2357 		 * Returns THROTTLE_LEVEL_* values
2358 		 */
2359 		value = thread->effective_policy.thep_io_tier;
2360 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2361 			value = MIN(value, thread->iotier_override);
2362 		}
2363 		break;
2364 	case TASK_POLICY_PASSIVE_IO:
2365 		/*
2366 		 * The I/O system calls here to find out whether an operation should be passive.
2367 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2368 		 * Returns 1 for passive mode, 0 for normal mode
2369 		 *
2370 		 * If an override is causing IO to go into a lower tier, we also set
2371 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2372 		 * window when the override goes away.
2373 		 */
2374 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2375 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2376 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2377 			value = 1;
2378 		}
2379 		break;
2380 	case TASK_POLICY_ALL_SOCKETS_BG:
2381 		/*
2382 		 * do_background_socket() calls this to determine whether
2383 		 * it should change the thread's sockets
2384 		 * Returns 1 for background mode, 0 for normal mode
2385 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2386 		 * doesn't get you out of the network throttle.
2387 		 */
2388 		value = (thread->effective_policy.thep_all_sockets_bg ||
2389 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2390 		break;
2391 	case TASK_POLICY_NEW_SOCKETS_BG:
2392 		/*
2393 		 * socreate() calls this to determine if it should mark a new socket as background
2394 		 * Returns 1 for background mode, 0 for normal mode
2395 		 */
2396 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2397 		break;
2398 	case TASK_POLICY_LATENCY_QOS:
2399 		/*
2400 		 * timer arming calls into here to find out the timer coalescing level
2401 		 * Returns a latency QoS tier (0-6)
2402 		 */
2403 		value = thread->effective_policy.thep_latency_qos;
2404 		break;
2405 	case TASK_POLICY_THROUGH_QOS:
2406 		/*
2407 		 * This value is passed into the urgency callout from the scheduler
2408 		 * to the performance management subsystem.
2409 		 *
2410 		 * Returns a throughput QoS tier (0-6)
2411 		 */
2412 		value = thread->effective_policy.thep_through_qos;
2413 		break;
2414 	case TASK_POLICY_QOS:
2415 		/*
2416 		 * This is communicated to the performance management layer and SFI.
2417 		 *
2418 		 * Returns a QoS policy tier
2419 		 */
2420 		value = thread->effective_policy.thep_qos;
2421 		break;
2422 	default:
2423 		panic("unknown thread policy flavor %d", flavor);
2424 		break;
2425 	}
2426 
2427 	return value;
2428 }
2429 
2430 
2431 /*
2432  * (integer_t) casts limit the number of bits we can fit here
2433  * this interface is deprecated and replaced by the _EXT struct ?
2434  */
2435 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2436 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2437 {
2438 	uint64_t bits = 0;
2439 	struct thread_requested_policy requested = thread->requested_policy;
2440 
2441 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2442 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2443 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2444 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2445 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2446 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2447 
2448 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2449 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2450 
2451 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2452 
2453 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2454 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2455 
2456 	info->requested = (integer_t) bits;
2457 	bits = 0;
2458 
2459 	struct thread_effective_policy effective = thread->effective_policy;
2460 
2461 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2462 
2463 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2464 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2465 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2466 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2467 
2468 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2469 
2470 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2471 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2472 
2473 	info->effective = (integer_t)bits;
2474 	bits = 0;
2475 
2476 	info->pending = 0;
2477 }
2478 
2479 /*
2480  * Sneakily trace either the task and thread requested
2481  * or just the thread requested, depending on if we have enough room.
2482  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2483  *
2484  *                                LP32            LP64
2485  * threquested_0(thread)          thread[0]       task[0]
2486  * threquested_1(thread)          thread[1]       thread[0]
2487  *
2488  */
2489 
2490 uintptr_t
threquested_0(thread_t thread)2491 threquested_0(thread_t thread)
2492 {
2493 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2494 
2495 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2496 
2497 	return raw[0];
2498 }
2499 
2500 uintptr_t
threquested_1(thread_t thread)2501 threquested_1(thread_t thread)
2502 {
2503 #if defined __LP64__
2504 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2505 #else
2506 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2507 	return raw[1];
2508 #endif
2509 }
2510 
2511 uintptr_t
theffective_0(thread_t thread)2512 theffective_0(thread_t thread)
2513 {
2514 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2515 
2516 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2517 	return raw[0];
2518 }
2519 
2520 uintptr_t
theffective_1(thread_t thread)2521 theffective_1(thread_t thread)
2522 {
2523 #if defined __LP64__
2524 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2525 #else
2526 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2527 	return raw[1];
2528 #endif
2529 }
2530 
2531 
2532 /*
2533  * Set an override on the thread which is consulted with a
2534  * higher priority than the task/thread policy. This should
2535  * only be set for temporary grants until the thread
2536  * returns to the userspace boundary
2537  *
2538  * We use atomic operations to swap in the override, with
2539  * the assumption that the thread itself can
2540  * read the override and clear it on return to userspace.
2541  *
2542  * No locking is performed, since it is acceptable to see
2543  * a stale override for one loop through throttle_lowpri_io().
2544  * However a thread reference must be held on the thread.
2545  */
2546 
2547 void
set_thread_iotier_override(thread_t thread,int policy)2548 set_thread_iotier_override(thread_t thread, int policy)
2549 {
2550 	int current_override;
2551 
2552 	/* Let most aggressive I/O policy win until user boundary */
2553 	do {
2554 		current_override = thread->iotier_override;
2555 
2556 		if (current_override != THROTTLE_LEVEL_NONE) {
2557 			policy = MIN(current_override, policy);
2558 		}
2559 
2560 		if (current_override == policy) {
2561 			/* no effective change */
2562 			return;
2563 		}
2564 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2565 
2566 	/*
2567 	 * Since the thread may be currently throttled,
2568 	 * re-evaluate tiers and potentially break out
2569 	 * of an msleep
2570 	 */
2571 	rethrottle_thread(get_bsdthread_info(thread));
2572 }
2573 
2574 /*
2575  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2576  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2577  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2578  * priority thread. In these cases, we attempt to propagate the priority token, as long
2579  * as the subsystem informs us of the relationships between the threads. The userspace
2580  * synchronization subsystem should maintain the information of owner->resource and
2581  * resource->waiters itself.
2582  */
2583 
2584 /*
2585  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2586  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2587  * to be handled specially in the future, but for now it's fine to slam
2588  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2589  */
2590 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2591 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2592 {
2593 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2594 		/* Map all input resource/type to a single one */
2595 		*resource = USER_ADDR_NULL;
2596 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2597 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2598 		/* no transform */
2599 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2600 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2601 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2602 			*resource = USER_ADDR_NULL;
2603 		}
2604 	}
2605 }
2606 
2607 /* This helper routine finds an existing override if known. Locking should be done by caller */
2608 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2609 find_qos_override(thread_t thread,
2610     user_addr_t resource,
2611     int resource_type)
2612 {
2613 	struct thread_qos_override *override;
2614 
2615 	override = thread->overrides;
2616 	while (override) {
2617 		if (override->override_resource == resource &&
2618 		    override->override_resource_type == resource_type) {
2619 			return override;
2620 		}
2621 
2622 		override = override->override_next;
2623 	}
2624 
2625 	return NULL;
2626 }
2627 
2628 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2629 find_and_decrement_qos_override(thread_t       thread,
2630     user_addr_t    resource,
2631     int            resource_type,
2632     boolean_t      reset,
2633     struct thread_qos_override **free_override_list)
2634 {
2635 	struct thread_qos_override *override, *override_prev;
2636 
2637 	override_prev = NULL;
2638 	override = thread->overrides;
2639 	while (override) {
2640 		struct thread_qos_override *override_next = override->override_next;
2641 
2642 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2643 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2644 			if (reset) {
2645 				override->override_contended_resource_count = 0;
2646 			} else {
2647 				override->override_contended_resource_count--;
2648 			}
2649 
2650 			if (override->override_contended_resource_count == 0) {
2651 				if (override_prev == NULL) {
2652 					thread->overrides = override_next;
2653 				} else {
2654 					override_prev->override_next = override_next;
2655 				}
2656 
2657 				/* Add to out-param for later zfree */
2658 				override->override_next = *free_override_list;
2659 				*free_override_list = override;
2660 			} else {
2661 				override_prev = override;
2662 			}
2663 
2664 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2665 				return;
2666 			}
2667 		} else {
2668 			override_prev = override;
2669 		}
2670 
2671 		override = override_next;
2672 	}
2673 }
2674 
2675 /* This helper recalculates the current requested override using the policy selected at boot */
2676 static int
calculate_requested_qos_override(thread_t thread)2677 calculate_requested_qos_override(thread_t thread)
2678 {
2679 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2680 		return THREAD_QOS_UNSPECIFIED;
2681 	}
2682 
2683 	/* iterate over all overrides and calculate MAX */
2684 	struct thread_qos_override *override;
2685 	int qos_override = THREAD_QOS_UNSPECIFIED;
2686 
2687 	override = thread->overrides;
2688 	while (override) {
2689 		qos_override = MAX(qos_override, override->override_qos);
2690 		override = override->override_next;
2691 	}
2692 
2693 	return qos_override;
2694 }
2695 
2696 /*
2697  * Returns:
2698  * - 0 on success
2699  * - EINVAL if some invalid input was passed
2700  */
2701 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2702 proc_thread_qos_add_override_internal(thread_t         thread,
2703     int              override_qos,
2704     boolean_t        first_override_for_resource,
2705     user_addr_t      resource,
2706     int              resource_type)
2707 {
2708 	struct task_pend_token pend_token = {};
2709 	int rc = 0;
2710 
2711 	thread_mtx_lock(thread);
2712 
2713 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2714 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2715 
2716 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2717 	    uint64_t, thread->requested_policy.thrp_qos,
2718 	    uint64_t, thread->effective_policy.thep_qos,
2719 	    int, override_qos, boolean_t, first_override_for_resource);
2720 
2721 	struct thread_qos_override *override;
2722 	struct thread_qos_override *override_new = NULL;
2723 	int new_qos_override, prev_qos_override;
2724 	int new_effective_qos;
2725 
2726 	canonicalize_resource_and_type(&resource, &resource_type);
2727 
2728 	override = find_qos_override(thread, resource, resource_type);
2729 	if (first_override_for_resource && !override) {
2730 		/* We need to allocate a new object. Drop the thread lock and
2731 		 * recheck afterwards in case someone else added the override
2732 		 */
2733 		thread_mtx_unlock(thread);
2734 		override_new = zalloc(thread_qos_override_zone);
2735 		thread_mtx_lock(thread);
2736 		override = find_qos_override(thread, resource, resource_type);
2737 	}
2738 	if (first_override_for_resource && override) {
2739 		/* Someone else already allocated while the thread lock was dropped */
2740 		override->override_contended_resource_count++;
2741 	} else if (!override && override_new) {
2742 		override = override_new;
2743 		override_new = NULL;
2744 		override->override_next = thread->overrides;
2745 		/* since first_override_for_resource was TRUE */
2746 		override->override_contended_resource_count = 1;
2747 		override->override_resource = resource;
2748 		override->override_resource_type = (int16_t)resource_type;
2749 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2750 		thread->overrides = override;
2751 	}
2752 
2753 	if (override) {
2754 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2755 			override->override_qos = (int16_t)override_qos;
2756 		} else {
2757 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2758 		}
2759 	}
2760 
2761 	/* Determine how to combine the various overrides into a single current
2762 	 * requested override
2763 	 */
2764 	new_qos_override = calculate_requested_qos_override(thread);
2765 
2766 	prev_qos_override = proc_get_thread_policy_locked(thread,
2767 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2768 
2769 	if (new_qos_override != prev_qos_override) {
2770 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2771 		    TASK_POLICY_QOS_OVERRIDE,
2772 		    new_qos_override, 0, &pend_token);
2773 	}
2774 
2775 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2776 
2777 	thread_mtx_unlock(thread);
2778 
2779 	thread_policy_update_complete_unlocked(thread, &pend_token);
2780 
2781 	if (override_new) {
2782 		zfree(thread_qos_override_zone, override_new);
2783 	}
2784 
2785 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2786 	    int, new_qos_override, int, new_effective_qos, int, rc);
2787 
2788 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2789 	    new_qos_override, resource, resource_type, 0, 0);
2790 
2791 	return rc;
2792 }
2793 
2794 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2795 proc_thread_qos_add_override(task_t           task,
2796     thread_t         thread,
2797     uint64_t         tid,
2798     int              override_qos,
2799     boolean_t        first_override_for_resource,
2800     user_addr_t      resource,
2801     int              resource_type)
2802 {
2803 	boolean_t has_thread_reference = FALSE;
2804 	int rc = 0;
2805 
2806 	if (thread == THREAD_NULL) {
2807 		thread = task_findtid(task, tid);
2808 		/* returns referenced thread */
2809 
2810 		if (thread == THREAD_NULL) {
2811 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2812 			    tid, 0, 0xdead, 0, 0);
2813 			return ESRCH;
2814 		}
2815 		has_thread_reference = TRUE;
2816 	} else {
2817 		assert(get_threadtask(thread) == task);
2818 	}
2819 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2820 	    first_override_for_resource, resource, resource_type);
2821 	if (has_thread_reference) {
2822 		thread_deallocate(thread);
2823 	}
2824 
2825 	return rc;
2826 }
2827 
2828 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2829 proc_thread_qos_remove_override_internal(thread_t       thread,
2830     user_addr_t    resource,
2831     int            resource_type,
2832     boolean_t      reset)
2833 {
2834 	struct task_pend_token pend_token = {};
2835 
2836 	struct thread_qos_override *deferred_free_override_list = NULL;
2837 	int new_qos_override, prev_qos_override, new_effective_qos;
2838 
2839 	thread_mtx_lock(thread);
2840 
2841 	canonicalize_resource_and_type(&resource, &resource_type);
2842 
2843 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2844 
2845 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2846 	    thread_tid(thread), resource, reset, 0, 0);
2847 
2848 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2849 	    uint64_t, thread->requested_policy.thrp_qos,
2850 	    uint64_t, thread->effective_policy.thep_qos);
2851 
2852 	/* Determine how to combine the various overrides into a single current requested override */
2853 	new_qos_override = calculate_requested_qos_override(thread);
2854 
2855 	spl_t s = splsched();
2856 	thread_lock(thread);
2857 
2858 	/*
2859 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2860 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2861 	 * This means you can't change the current override from a spinlock-only setter.
2862 	 */
2863 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2864 
2865 	if (new_qos_override != prev_qos_override) {
2866 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2867 	}
2868 
2869 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2870 
2871 	thread_unlock(thread);
2872 	splx(s);
2873 
2874 	thread_mtx_unlock(thread);
2875 
2876 	thread_policy_update_complete_unlocked(thread, &pend_token);
2877 
2878 	while (deferred_free_override_list) {
2879 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2880 
2881 		zfree(thread_qos_override_zone, deferred_free_override_list);
2882 		deferred_free_override_list = override_next;
2883 	}
2884 
2885 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2886 	    int, new_qos_override, int, new_effective_qos);
2887 
2888 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2889 	    thread_tid(thread), 0, 0, 0, 0);
2890 }
2891 
2892 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2893 proc_thread_qos_remove_override(task_t      task,
2894     thread_t    thread,
2895     uint64_t    tid,
2896     user_addr_t resource,
2897     int         resource_type)
2898 {
2899 	boolean_t has_thread_reference = FALSE;
2900 
2901 	if (thread == THREAD_NULL) {
2902 		thread = task_findtid(task, tid);
2903 		/* returns referenced thread */
2904 
2905 		if (thread == THREAD_NULL) {
2906 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2907 			    tid, 0, 0xdead, 0, 0);
2908 			return ESRCH;
2909 		}
2910 		has_thread_reference = TRUE;
2911 	} else {
2912 		assert(task == get_threadtask(thread));
2913 	}
2914 
2915 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2916 
2917 	if (has_thread_reference) {
2918 		thread_deallocate(thread);
2919 	}
2920 
2921 	return 0;
2922 }
2923 
2924 /* Deallocate before thread termination */
2925 void
proc_thread_qos_deallocate(thread_t thread)2926 proc_thread_qos_deallocate(thread_t thread)
2927 {
2928 	/* This thread must have no more IPC overrides. */
2929 	assert(thread->kevent_overrides == 0);
2930 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2931 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2932 
2933 	/*
2934 	 * Clear out any lingering override objects.
2935 	 */
2936 	struct thread_qos_override *override;
2937 
2938 	thread_mtx_lock(thread);
2939 	override = thread->overrides;
2940 	thread->overrides = NULL;
2941 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2942 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2943 	thread_mtx_unlock(thread);
2944 
2945 	while (override) {
2946 		struct thread_qos_override *override_next = override->override_next;
2947 
2948 		zfree(thread_qos_override_zone, override);
2949 		override = override_next;
2950 	}
2951 }
2952 
2953 /*
2954  * Set up the primordial thread's QoS
2955  */
2956 void
task_set_main_thread_qos(task_t task,thread_t thread)2957 task_set_main_thread_qos(task_t task, thread_t thread)
2958 {
2959 	struct task_pend_token pend_token = {};
2960 
2961 	assert(get_threadtask(thread) == task);
2962 
2963 	thread_mtx_lock(thread);
2964 
2965 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2966 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2967 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2968 	    thread->requested_policy.thrp_qos, 0);
2969 
2970 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2971 
2972 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2973 	    primordial_qos, 0, &pend_token);
2974 
2975 	thread_mtx_unlock(thread);
2976 
2977 	thread_policy_update_complete_unlocked(thread, &pend_token);
2978 
2979 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2980 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2981 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2982 	    primordial_qos, 0);
2983 }
2984 
2985 /*
2986  * KPI for pthread kext
2987  *
2988  * Return a good guess at what the initial manager QoS will be
2989  * Dispatch can override this in userspace if it so chooses
2990  */
2991 thread_qos_t
task_get_default_manager_qos(task_t task)2992 task_get_default_manager_qos(task_t task)
2993 {
2994 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2995 
2996 	if (primordial_qos == THREAD_QOS_LEGACY) {
2997 		primordial_qos = THREAD_QOS_USER_INITIATED;
2998 	}
2999 
3000 	return primordial_qos;
3001 }
3002 
3003 /*
3004  * Check if the kernel promotion on thread has changed
3005  * and apply it.
3006  *
3007  * thread locked on entry and exit
3008  */
3009 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3010 thread_recompute_kernel_promotion_locked(thread_t thread)
3011 {
3012 	boolean_t needs_update = FALSE;
3013 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3014 
3015 	/*
3016 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3017 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3018 	 * and propagates the priority through the chain with the same cap, because as of now it does
3019 	 * not differenciate on the kernel primitive.
3020 	 *
3021 	 * If this assumption will change with the adoption of a kernel primitive that does not
3022 	 * cap the when adding/propagating,
3023 	 * then here is the place to put the generic cap for all kernel primitives
3024 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3025 	 */
3026 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3027 
3028 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3029 		KDBG(MACHDBG_CODE(
3030 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3031 		    thread_tid(thread),
3032 		    kern_promotion_schedpri,
3033 		    thread->kern_promotion_schedpri);
3034 
3035 		needs_update = TRUE;
3036 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3037 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3038 	}
3039 
3040 	return needs_update;
3041 }
3042 
3043 /*
3044  * Check if the user promotion on thread has changed
3045  * and apply it.
3046  *
3047  * thread locked on entry, might drop the thread lock
3048  * and reacquire it.
3049  */
3050 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3051 thread_recompute_user_promotion_locked(thread_t thread)
3052 {
3053 	boolean_t needs_update = FALSE;
3054 	struct task_pend_token pend_token = {};
3055 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3056 	int old_base_pri = thread->base_pri;
3057 	thread_qos_t qos_promotion;
3058 
3059 	/* Check if user promotion has changed */
3060 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3061 		return needs_update;
3062 	} else {
3063 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3064 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3065 		    thread_tid(thread),
3066 		    user_promotion_basepri,
3067 		    thread->user_promotion_basepri,
3068 		    0, 0);
3069 		KDBG(MACHDBG_CODE(
3070 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3071 		    thread_tid(thread),
3072 		    user_promotion_basepri,
3073 		    thread->user_promotion_basepri);
3074 	}
3075 
3076 	/* Update the user promotion base pri */
3077 	thread->user_promotion_basepri = user_promotion_basepri;
3078 	pend_token.tpt_force_recompute_pri = 1;
3079 
3080 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3081 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3082 	} else {
3083 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3084 	}
3085 
3086 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3087 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3088 
3089 	if (thread_get_waiting_turnstile(thread) &&
3090 	    thread->base_pri != old_base_pri) {
3091 		needs_update = TRUE;
3092 	}
3093 
3094 	thread_unlock(thread);
3095 
3096 	thread_policy_update_complete_unlocked(thread, &pend_token);
3097 
3098 	thread_lock(thread);
3099 
3100 	return needs_update;
3101 }
3102 
3103 /*
3104  * Convert the thread user promotion base pri to qos for threads in qos world.
3105  * For priority above UI qos, the qos would be set to UI.
3106  */
3107 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3108 thread_user_promotion_qos_for_pri(int priority)
3109 {
3110 	thread_qos_t qos;
3111 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3112 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3113 			return qos;
3114 		}
3115 	}
3116 	return THREAD_QOS_MAINTENANCE;
3117 }
3118 
3119 /*
3120  * Set the thread's QoS Kevent override
3121  * Owned by the Kevent subsystem
3122  *
3123  * May be called with spinlocks held, but not spinlocks
3124  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3125  *
3126  * One 'add' must be balanced by one 'drop'.
3127  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3128  * Before the thread is deallocated, there must be 0 remaining overrides.
3129  */
3130 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3131 thread_kevent_override(thread_t    thread,
3132     uint32_t    qos_override,
3133     boolean_t   is_new_override)
3134 {
3135 	struct task_pend_token pend_token = {};
3136 	boolean_t needs_update;
3137 
3138 	spl_t s = splsched();
3139 	thread_lock(thread);
3140 
3141 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3142 
3143 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3144 	assert(qos_override < THREAD_QOS_LAST);
3145 
3146 	if (is_new_override) {
3147 		if (thread->kevent_overrides++ == 0) {
3148 			/* This add is the first override for this thread */
3149 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3150 		} else {
3151 			/* There are already other overrides in effect for this thread */
3152 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3153 		}
3154 	} else {
3155 		/* There must be at least one override (the previous add call) in effect */
3156 		assert(thread->kevent_overrides > 0);
3157 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3158 	}
3159 
3160 	/*
3161 	 * We can't allow lowering if there are several IPC overrides because
3162 	 * the caller can't possibly know the whole truth
3163 	 */
3164 	if (thread->kevent_overrides == 1) {
3165 		needs_update = qos_override != old_override;
3166 	} else {
3167 		needs_update = qos_override > old_override;
3168 	}
3169 
3170 	if (needs_update) {
3171 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3172 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3173 		    qos_override, 0, &pend_token);
3174 		assert(pend_token.tpt_update_sockets == 0);
3175 	}
3176 
3177 	thread_unlock(thread);
3178 	splx(s);
3179 
3180 	thread_policy_update_complete_unlocked(thread, &pend_token);
3181 }
3182 
3183 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3184 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3185 {
3186 	thread_kevent_override(thread, qos_override, TRUE);
3187 }
3188 
3189 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3190 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3191 {
3192 	thread_kevent_override(thread, qos_override, FALSE);
3193 }
3194 
3195 void
thread_drop_kevent_override(thread_t thread)3196 thread_drop_kevent_override(thread_t thread)
3197 {
3198 	struct task_pend_token pend_token = {};
3199 
3200 	spl_t s = splsched();
3201 	thread_lock(thread);
3202 
3203 	assert(thread->kevent_overrides > 0);
3204 
3205 	if (--thread->kevent_overrides == 0) {
3206 		/*
3207 		 * There are no more overrides for this thread, so we should
3208 		 * clear out the saturated override value
3209 		 */
3210 
3211 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3212 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3213 		    0, &pend_token);
3214 	}
3215 
3216 	thread_unlock(thread);
3217 	splx(s);
3218 
3219 	thread_policy_update_complete_unlocked(thread, &pend_token);
3220 }
3221 
3222 /*
3223  * Set the thread's QoS Workloop Servicer override
3224  * Owned by the Kevent subsystem
3225  *
3226  * May be called with spinlocks held, but not spinlocks
3227  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3228  *
3229  * One 'add' must be balanced by one 'drop'.
3230  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3231  * Before the thread is deallocated, there must be 0 remaining overrides.
3232  */
3233 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3234 thread_servicer_override(thread_t    thread,
3235     uint32_t    qos_override,
3236     boolean_t   is_new_override)
3237 {
3238 	struct task_pend_token pend_token = {};
3239 
3240 	spl_t s = splsched();
3241 	thread_lock(thread);
3242 
3243 	if (is_new_override) {
3244 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3245 	} else {
3246 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3247 	}
3248 
3249 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3250 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3251 	    qos_override, 0, &pend_token);
3252 
3253 	thread_unlock(thread);
3254 	splx(s);
3255 
3256 	assert(pend_token.tpt_update_sockets == 0);
3257 	thread_policy_update_complete_unlocked(thread, &pend_token);
3258 }
3259 
3260 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3261 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3262 {
3263 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3264 	assert(qos_override < THREAD_QOS_LAST);
3265 
3266 	thread_servicer_override(thread, qos_override, TRUE);
3267 }
3268 
3269 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3270 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3271 {
3272 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3273 	assert(qos_override < THREAD_QOS_LAST);
3274 
3275 	thread_servicer_override(thread, qos_override, FALSE);
3276 }
3277 
3278 void
thread_drop_servicer_override(thread_t thread)3279 thread_drop_servicer_override(thread_t thread)
3280 {
3281 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3282 }
3283 
3284 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3285 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3286 {
3287 	struct task_pend_token pend_token = {};
3288 	uint8_t current_iotier;
3289 
3290 	/* Check if the update is needed */
3291 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3292 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3293 
3294 	if (iotier_override == current_iotier) {
3295 		return;
3296 	}
3297 
3298 	spl_t s = splsched();
3299 	thread_lock(thread);
3300 
3301 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3302 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3303 	    iotier_override, 0, &pend_token);
3304 
3305 	thread_unlock(thread);
3306 	splx(s);
3307 
3308 	assert(pend_token.tpt_update_sockets == 0);
3309 	thread_policy_update_complete_unlocked(thread, &pend_token);
3310 }
3311 
3312 /* Get current requested qos / relpri, may be called from spinlock context */
3313 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3314 thread_get_requested_qos(thread_t thread, int *relpri)
3315 {
3316 	int relprio_value = 0;
3317 	thread_qos_t qos;
3318 
3319 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3320 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3321 	if (relpri) {
3322 		*relpri = -relprio_value;
3323 	}
3324 	return qos;
3325 }
3326 
3327 /*
3328  * This function will promote the thread priority
3329  * since exec could block other threads calling
3330  * proc_find on the proc. This boost must be removed
3331  * via call to thread_clear_exec_promotion.
3332  *
3333  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3334  */
3335 void
thread_set_exec_promotion(thread_t thread)3336 thread_set_exec_promotion(thread_t thread)
3337 {
3338 	spl_t s = splsched();
3339 	thread_lock(thread);
3340 
3341 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3342 
3343 	thread_unlock(thread);
3344 	splx(s);
3345 }
3346 
3347 /*
3348  * This function will clear the exec thread
3349  * promotion set on the thread by thread_set_exec_promotion.
3350  */
3351 void
thread_clear_exec_promotion(thread_t thread)3352 thread_clear_exec_promotion(thread_t thread)
3353 {
3354 	spl_t s = splsched();
3355 	thread_lock(thread);
3356 
3357 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3358 
3359 	thread_unlock(thread);
3360 	splx(s);
3361 }
3362 
3363 #if CONFIG_SCHED_RT_ALLOW
3364 
3365 /*
3366  * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3367  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3368  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3369  */
3370 static TUNABLE(
3371 	bool,
3372 	rt_allow_policy_enabled,
3373 	"-rt-allow_policy-enable",
3374 	false
3375 	);
3376 
3377 /*
3378  * When the RT allow policy is enabled and a thread allowed to become RT,
3379  * sometimes (if the processes RT allow policy is restricted) the thread will
3380  * have a CPU limit enforced. The following two tunables determine the
3381  * parameters for that CPU limit.
3382  */
3383 
3384 /* % of the interval allowed to run. */
3385 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3386     "rt_allow_limit_percent", 70);
3387 
3388 /* The length of interval in nanoseconds. */
3389 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3390     "rt_allow_limit_interval", 10);
3391 
3392 static bool
thread_has_rt(thread_t thread)3393 thread_has_rt(thread_t thread)
3394 {
3395 	return
3396 	        thread->sched_mode == TH_MODE_REALTIME ||
3397 	        thread->saved_mode == TH_MODE_REALTIME;
3398 }
3399 
3400 /*
3401  * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3402  * up by the target thread via the ledger AST.
3403  */
3404 static void
thread_rt_set_cpulimit(thread_t thread)3405 thread_rt_set_cpulimit(thread_t thread)
3406 {
3407 	/* Force reasonable values for the cpu limit. */
3408 	const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3409 	const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3410 
3411 	thread->t_ledger_req_percentage = percent;
3412 	thread->t_ledger_req_interval_ms = interval_ms;
3413 	thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3414 
3415 	thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3416 }
3417 
3418 /* Similar to the above but removes any CPU limit. */
3419 static void
thread_rt_clear_cpulimit(thread_t thread)3420 thread_rt_clear_cpulimit(thread_t thread)
3421 {
3422 	thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3423 
3424 	thread->t_ledger_req_percentage = 0;
3425 	thread->t_ledger_req_interval_ms = 0;
3426 	thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3427 }
3428 
3429 /*
3430  * Evaluate RT policy for a thread, demoting and undemoting as needed.
3431  */
3432 void
thread_rt_evaluate(thread_t thread)3433 thread_rt_evaluate(thread_t thread)
3434 {
3435 	task_t task = get_threadtask(thread);
3436 	bool platform_binary = false;
3437 
3438 	/* If the RT allow policy is not enabled - nothing to do. */
3439 	if (!rt_allow_policy_enabled) {
3440 		return;
3441 	}
3442 
3443 	/* User threads only. */
3444 	if (task == kernel_task) {
3445 		return;
3446 	}
3447 
3448 	/* Check for platform binary. */
3449 	platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3450 
3451 	spl_t s = splsched();
3452 	thread_lock(thread);
3453 
3454 	const thread_work_interval_flags_t wi_flags =
3455 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3456 
3457 	/*
3458 	 * RT threads which are not joined to a work interval which allows RT
3459 	 * threads are demoted. Once those conditions no longer hold, the thread
3460 	 * undemoted.
3461 	 */
3462 	if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3463 		if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3464 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3465 			    thread_tid(thread));
3466 			sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3467 		}
3468 	} else {
3469 		if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3470 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3471 		}
3472 	}
3473 
3474 	/*
3475 	 * RT threads get a CPU limit unless they're part of a platform binary
3476 	 * task. If the thread is no longer RT, any existing CPU limit should be
3477 	 * removed.
3478 	 */
3479 	bool set_ast = false;
3480 	if (!platform_binary &&
3481 	    thread_has_rt(thread) &&
3482 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3483 		thread_rt_set_cpulimit(thread);
3484 		set_ast = true;
3485 	}
3486 
3487 	if (!platform_binary &&
3488 	    !thread_has_rt(thread) &&
3489 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3490 		thread_rt_clear_cpulimit(thread);
3491 		set_ast = true;
3492 	}
3493 
3494 	thread_unlock(thread);
3495 	splx(s);
3496 
3497 	if (set_ast) {
3498 		/* Ensure the target thread picks up any CPU limit change. */
3499 		act_set_astledger(thread);
3500 	}
3501 }
3502 
3503 #else
3504 
3505 void
thread_rt_evaluate(__unused thread_t thread)3506 thread_rt_evaluate(__unused thread_t thread)
3507 {
3508 }
3509 
3510 #endif /*  CONFIG_SCHED_RT_ALLOW */
3511