xref: /xnu-10002.41.9/osfmk/kern/thread_policy.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42 
43 #include <mach/machine/sdt.h>
44 
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46     struct thread_qos_override, KT_DEFAULT);
47 
48 #ifdef MACH_BSD
49 extern int      proc_selfpid(void);
50 extern char *   proc_name_address(void *p);
51 extern void     rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53 
54 #define QOS_EXTRACT(q)        ((q) & 0xff)
55 
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60 
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63 
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66 
67 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
68 
69 const struct thread_requested_policy default_thread_requested_policy = {
70 	.thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72 
73 /*
74  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75  * to threads that don't have a QoS class set.
76  */
77 const qos_policy_params_t thread_qos_policy_params = {
78 	/*
79 	 * This table defines the starting base priority of the thread,
80 	 * which will be modified by the thread importance and the task max priority
81 	 * before being applied.
82 	 */
83 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
84 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
85 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
86 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
87 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
88 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
89 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
90 
91 	/*
92 	 * This table defines the highest IO priority that a thread marked with this
93 	 * QoS class can have.
94 	 */
95 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
100 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
102 
103 	/*
104 	 * This table defines the highest QoS level that
105 	 * a thread marked with this QoS class can have.
106 	 */
107 
108 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 
116 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124 
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127 
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130 
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133 
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136 
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139 
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142 
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145 
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148 
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151 
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154 
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160 
161 
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164     task_pend_token_t pend_token)
165 {
166 	__unused int prev_qos = thread->requested_policy.thrp_qos;
167 
168 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169 
170 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173 
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 	struct task_pend_token pend_token = {};
178 
179 	thread_mtx_lock(thread);
180 	if (!thread->active) {
181 		thread_mtx_unlock(thread);
182 		return KERN_TERMINATED;
183 	}
184 
185 	thread_remove_qos_policy_locked(thread, &pend_token);
186 
187 	thread_mtx_unlock(thread);
188 
189 	thread_policy_update_complete_unlocked(thread, &pend_token);
190 
191 	return KERN_SUCCESS;
192 }
193 
194 
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 	if (thread->static_param) {
199 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 		return TRUE;
201 	}
202 	return FALSE;
203 }
204 
205 /*
206  * Relative priorities can range between 0REL and -15REL. These
207  * map to QoS-specific ranges, to create non-overlapping priority
208  * ranges.
209  */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 	int next_lower_qos;
214 
215 	/* Fast path, since no validation or scaling is needed */
216 	if (qos_relprio == 0) {
217 		return 0;
218 	}
219 
220 	switch (qos) {
221 	case THREAD_QOS_USER_INTERACTIVE:
222 		next_lower_qos = THREAD_QOS_USER_INITIATED;
223 		break;
224 	case THREAD_QOS_USER_INITIATED:
225 		next_lower_qos = THREAD_QOS_LEGACY;
226 		break;
227 	case THREAD_QOS_LEGACY:
228 		next_lower_qos = THREAD_QOS_UTILITY;
229 		break;
230 	case THREAD_QOS_UTILITY:
231 		next_lower_qos = THREAD_QOS_BACKGROUND;
232 		break;
233 	case THREAD_QOS_MAINTENANCE:
234 	case THREAD_QOS_BACKGROUND:
235 		next_lower_qos = 0;
236 		break;
237 	default:
238 		panic("Unrecognized QoS %d", qos);
239 		return 0;
240 	}
241 
242 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244 
245 	/*
246 	 * We now have the valid range that the scaled relative priority can map to. Note
247 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 	 * remainder.
251 	 */
252 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253 
254 	return scaled_relprio;
255 }
256 
257 /*
258  * flag set by -qos-policy-allow boot-arg to allow
259  * testing thread qos policy from userspace
260  */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262 
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 	thread_t                                thread,
266 	thread_policy_flavor_t  flavor,
267 	thread_policy_t                 policy_info,
268 	mach_msg_type_number_t  count)
269 {
270 	thread_qos_policy_data_t req_qos;
271 	kern_return_t kr;
272 
273 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274 
275 	if (thread == THREAD_NULL) {
276 		return KERN_INVALID_ARGUMENT;
277 	}
278 
279 	if (!allow_qos_policy_set) {
280 		if (thread_is_static_param(thread)) {
281 			return KERN_POLICY_STATIC;
282 		}
283 
284 		if (flavor == THREAD_QOS_POLICY) {
285 			return KERN_INVALID_ARGUMENT;
286 		}
287 
288 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 				return KERN_INVALID_ARGUMENT;
291 			}
292 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 			if (info->priority != BASEPRI_RTQUEUES) {
294 				return KERN_INVALID_ARGUMENT;
295 			}
296 		}
297 	}
298 
299 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 			&thread->th_work_interval_flags, relaxed);
302 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 			/* Fail requests to become realtime for threads having joined workintervals
305 			 * with workload ID that don't have the rt-allowed flag. */
306 			return KERN_INVALID_POLICY;
307 		}
308 	}
309 
310 	/* Threads without static_param set reset their QoS when other policies are applied. */
311 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 		/* Store the existing tier, if we fail this call it is used to reset back. */
313 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315 
316 		kr = thread_remove_qos_policy(thread);
317 		if (kr != KERN_SUCCESS) {
318 			return kr;
319 		}
320 	}
321 
322 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323 
324 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 		if (kr != KERN_SUCCESS) {
326 			/* Reset back to our original tier as the set failed. */
327 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 		}
329 	}
330 
331 	return kr;
332 }
333 
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338 
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 	thread_t                     thread,
342 	thread_policy_flavor_t       flavor,
343 	thread_policy_t              policy_info,
344 	mach_msg_type_number_t       count)
345 {
346 	kern_return_t result = KERN_SUCCESS;
347 	struct task_pend_token pend_token = {};
348 
349 	thread_mtx_lock(thread);
350 	if (!thread->active) {
351 		thread_mtx_unlock(thread);
352 
353 		return KERN_TERMINATED;
354 	}
355 
356 	switch (flavor) {
357 	case THREAD_EXTENDED_POLICY:
358 	{
359 		boolean_t timeshare = TRUE;
360 
361 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 			thread_extended_policy_t info;
363 
364 			info = (thread_extended_policy_t)policy_info;
365 			timeshare = info->timeshare;
366 		}
367 
368 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369 
370 		spl_t s = splsched();
371 		thread_lock(thread);
372 
373 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374 
375 		thread_unlock(thread);
376 		splx(s);
377 
378 		/*
379 		 * The thread may be demoted with RT_DISALLOWED but has just
380 		 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 		 * undemote the thread so the new sched mode takes effect.
382 		 */
383 		thread_rt_evaluate(thread);
384 
385 		pend_token.tpt_update_thread_sfi = 1;
386 
387 		break;
388 	}
389 
390 	case THREAD_TIME_CONSTRAINT_POLICY:
391 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 	{
393 		thread_time_constraint_with_priority_policy_t info;
394 
395 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398 
399 		if (count < min_count) {
400 			result = KERN_INVALID_ARGUMENT;
401 			break;
402 		}
403 
404 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
405 
406 
407 		if (info->constraint < info->computation ||
408 		    info->computation > max_rt_quantum ||
409 		    info->computation < min_rt_quantum) {
410 			result = KERN_INVALID_ARGUMENT;
411 			break;
412 		}
413 
414 		if (info->computation < (info->constraint / 2)) {
415 			info->computation = (info->constraint / 2);
416 			if (info->computation > max_rt_quantum) {
417 				info->computation = max_rt_quantum;
418 			}
419 		}
420 
421 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 				result = KERN_INVALID_ARGUMENT;
424 				break;
425 			}
426 		}
427 
428 		spl_t s = splsched();
429 		thread_lock(thread);
430 
431 		thread->realtime.period          = info->period;
432 		thread->realtime.computation     = info->computation;
433 		thread->realtime.constraint      = info->constraint;
434 		thread->realtime.preemptible     = info->preemptible;
435 
436 		/*
437 		 * If the thread has a work interval driven policy, the priority
438 		 * offset has been set by the work interval.
439 		 */
440 		if (!thread->requested_policy.thrp_wi_driven) {
441 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 				thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 			} else {
444 				thread->realtime.priority_offset = 0;
445 			}
446 		}
447 
448 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
449 
450 		thread_unlock(thread);
451 		splx(s);
452 
453 		thread_rt_evaluate(thread);
454 
455 		pend_token.tpt_update_thread_sfi = 1;
456 
457 		break;
458 	}
459 
460 	case THREAD_PRECEDENCE_POLICY:
461 	{
462 		thread_precedence_policy_t info;
463 
464 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 			result = KERN_INVALID_ARGUMENT;
466 			break;
467 		}
468 		info = (thread_precedence_policy_t)policy_info;
469 
470 		spl_t s = splsched();
471 		thread_lock(thread);
472 
473 		thread->importance = info->importance;
474 
475 		thread_recompute_priority(thread);
476 
477 		thread_unlock(thread);
478 		splx(s);
479 
480 		break;
481 	}
482 
483 	case THREAD_AFFINITY_POLICY:
484 	{
485 		extern boolean_t affinity_sets_enabled;
486 		thread_affinity_policy_t info;
487 
488 		if (!affinity_sets_enabled) {
489 			result = KERN_INVALID_POLICY;
490 			break;
491 		}
492 
493 		if (!thread_affinity_is_supported()) {
494 			result = KERN_NOT_SUPPORTED;
495 			break;
496 		}
497 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 			result = KERN_INVALID_ARGUMENT;
499 			break;
500 		}
501 
502 		info = (thread_affinity_policy_t) policy_info;
503 		/*
504 		 * Unlock the thread mutex here and
505 		 * return directly after calling thread_affinity_set().
506 		 * This is necessary for correct lock ordering because
507 		 * thread_affinity_set() takes the task lock.
508 		 */
509 		thread_mtx_unlock(thread);
510 		return thread_affinity_set(thread, info->affinity_tag);
511 	}
512 
513 #if !defined(XNU_TARGET_OS_OSX)
514 	case THREAD_BACKGROUND_POLICY:
515 	{
516 		thread_background_policy_t info;
517 
518 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 			result = KERN_INVALID_ARGUMENT;
520 			break;
521 		}
522 
523 		if (get_threadtask(thread) != current_task()) {
524 			result = KERN_PROTECTION_FAILURE;
525 			break;
526 		}
527 
528 		info = (thread_background_policy_t) policy_info;
529 
530 		int enable;
531 
532 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 			enable = TASK_POLICY_ENABLE;
534 		} else {
535 			enable = TASK_POLICY_DISABLE;
536 		}
537 
538 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539 
540 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541 
542 		break;
543 	}
544 #endif /* !defined(XNU_TARGET_OS_OSX) */
545 
546 	case THREAD_THROUGHPUT_QOS_POLICY:
547 	{
548 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 		thread_throughput_qos_t tqos;
550 
551 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 			result = KERN_INVALID_ARGUMENT;
553 			break;
554 		}
555 
556 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 			break;
558 		}
559 
560 		tqos = qos_extract(info->thread_throughput_qos_tier);
561 
562 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
564 
565 		break;
566 	}
567 
568 	case THREAD_LATENCY_QOS_POLICY:
569 	{
570 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 		thread_latency_qos_t lqos;
572 
573 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 			result = KERN_INVALID_ARGUMENT;
575 			break;
576 		}
577 
578 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 			break;
580 		}
581 
582 		lqos = qos_extract(info->thread_latency_qos_tier);
583 
584 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
586 
587 		break;
588 	}
589 
590 	case THREAD_QOS_POLICY:
591 	{
592 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593 
594 		if (count < THREAD_QOS_POLICY_COUNT) {
595 			result = KERN_INVALID_ARGUMENT;
596 			break;
597 		}
598 
599 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 			result = KERN_INVALID_ARGUMENT;
601 			break;
602 		}
603 
604 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 			result = KERN_INVALID_ARGUMENT;
606 			break;
607 		}
608 
609 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 			result = KERN_INVALID_ARGUMENT;
611 			break;
612 		}
613 
614 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 		    info->qos_tier, -info->tier_importance, &pend_token);
616 
617 		break;
618 	}
619 
620 	default:
621 		result = KERN_INVALID_ARGUMENT;
622 		break;
623 	}
624 
625 	thread_mtx_unlock(thread);
626 
627 	thread_policy_update_complete_unlocked(thread, &pend_token);
628 
629 	return result;
630 }
631 
632 /*
633  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634  * Both result in FIXED mode scheduling.
635  */
636 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)637 convert_policy_to_sched_mode(integer_t policy)
638 {
639 	switch (policy) {
640 	case POLICY_TIMESHARE:
641 		return TH_MODE_TIMESHARE;
642 	case POLICY_RR:
643 	case POLICY_FIFO:
644 		return TH_MODE_FIXED;
645 	default:
646 		panic("unexpected sched policy: %d", policy);
647 		return TH_MODE_NONE;
648 	}
649 }
650 
651 /*
652  * Called either with the thread mutex locked
653  * or from the pthread kext in a 'safe place'.
654  */
655 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)656 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
657     sched_mode_t          mode,
658     integer_t             priority,
659     task_pend_token_t     pend_token)
660 {
661 	kern_return_t kr = KERN_SUCCESS;
662 
663 	spl_t s = splsched();
664 	thread_lock(thread);
665 
666 	/* This path isn't allowed to change a thread out of realtime. */
667 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 	    (thread->saved_mode == TH_MODE_REALTIME)) {
669 		kr = KERN_FAILURE;
670 		goto unlock;
671 	}
672 
673 	if (thread->policy_reset) {
674 		kr = KERN_SUCCESS;
675 		goto unlock;
676 	}
677 
678 	sched_mode_t old_mode = thread->sched_mode;
679 
680 	/*
681 	 * Reverse engineer and apply the correct importance value
682 	 * from the requested absolute priority value.
683 	 *
684 	 * TODO: Store the absolute priority value instead
685 	 */
686 
687 	if (priority >= thread->max_priority) {
688 		priority = thread->max_priority - thread->task_priority;
689 	} else if (priority >= MINPRI_KERNEL) {
690 		priority -=  MINPRI_KERNEL;
691 	} else if (priority >= MINPRI_RESERVED) {
692 		priority -=  MINPRI_RESERVED;
693 	} else {
694 		priority -= BASEPRI_DEFAULT;
695 	}
696 
697 	priority += thread->task_priority;
698 
699 	if (priority > thread->max_priority) {
700 		priority = thread->max_priority;
701 	} else if (priority < MINPRI) {
702 		priority = MINPRI;
703 	}
704 
705 	thread->importance = priority - thread->task_priority;
706 
707 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
708 
709 	if (mode != old_mode) {
710 		pend_token->tpt_update_thread_sfi = 1;
711 	}
712 
713 unlock:
714 	thread_unlock(thread);
715 	splx(s);
716 
717 	return kr;
718 }
719 
720 void
thread_freeze_base_pri(thread_t thread)721 thread_freeze_base_pri(thread_t thread)
722 {
723 	assert(thread == current_thread());
724 
725 	spl_t s = splsched();
726 	thread_lock(thread);
727 
728 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
729 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
730 
731 	thread_unlock(thread);
732 	splx(s);
733 }
734 
735 bool
thread_unfreeze_base_pri(thread_t thread)736 thread_unfreeze_base_pri(thread_t thread)
737 {
738 	assert(thread == current_thread());
739 	integer_t base_pri;
740 	ast_t ast = 0;
741 
742 	spl_t s = splsched();
743 	thread_lock(thread);
744 
745 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
746 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
747 
748 	base_pri = thread->req_base_pri;
749 	if (base_pri != thread->base_pri) {
750 		/*
751 		 * This function returns "true" if the base pri change
752 		 * is the most likely cause for the preemption.
753 		 */
754 		sched_set_thread_base_priority(thread, base_pri);
755 		ast = ast_peek(AST_PREEMPT);
756 	}
757 
758 	thread_unlock(thread);
759 	splx(s);
760 
761 	return ast != 0;
762 }
763 
764 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)765 thread_workq_pri_for_qos(thread_qos_t qos)
766 {
767 	assert(qos < THREAD_QOS_LAST);
768 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
769 }
770 
771 thread_qos_t
thread_workq_qos_for_pri(int priority)772 thread_workq_qos_for_pri(int priority)
773 {
774 	thread_qos_t qos;
775 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
776 		// indicate that workq should map >UI threads to workq's
777 		// internal notation for above-UI work.
778 		return THREAD_QOS_UNSPECIFIED;
779 	}
780 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
781 		// map a given priority up to the next nearest qos band.
782 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
783 			return qos;
784 		}
785 	}
786 	return THREAD_QOS_MAINTENANCE;
787 }
788 
789 /*
790  * private interface for pthread workqueues
791  *
792  * Set scheduling policy & absolute priority for thread
793  * May be called with spinlocks held
794  * Thread mutex lock is not held
795  */
796 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)797 thread_reset_workq_qos(thread_t thread, uint32_t qos)
798 {
799 	struct task_pend_token pend_token = {};
800 
801 	assert(qos < THREAD_QOS_LAST);
802 
803 	spl_t s = splsched();
804 	thread_lock(thread);
805 
806 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
807 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
808 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
809 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
810 	    &pend_token);
811 
812 	assert(pend_token.tpt_update_sockets == 0);
813 
814 	thread_unlock(thread);
815 	splx(s);
816 
817 	thread_policy_update_complete_unlocked(thread, &pend_token);
818 }
819 
820 /*
821  * private interface for pthread workqueues
822  *
823  * Set scheduling policy & absolute priority for thread
824  * May be called with spinlocks held
825  * Thread mutex lock is held
826  */
827 void
thread_set_workq_override(thread_t thread,uint32_t qos)828 thread_set_workq_override(thread_t thread, uint32_t qos)
829 {
830 	struct task_pend_token pend_token = {};
831 
832 	assert(qos < THREAD_QOS_LAST);
833 
834 	spl_t s = splsched();
835 	thread_lock(thread);
836 
837 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
838 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
839 
840 	assert(pend_token.tpt_update_sockets == 0);
841 
842 	thread_unlock(thread);
843 	splx(s);
844 
845 	thread_policy_update_complete_unlocked(thread, &pend_token);
846 }
847 
848 /*
849  * private interface for pthread workqueues
850  *
851  * Set scheduling policy & absolute priority for thread
852  * May be called with spinlocks held
853  * Thread mutex lock is not held
854  */
855 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)856 thread_set_workq_pri(thread_t  thread,
857     thread_qos_t qos,
858     integer_t priority,
859     integer_t policy)
860 {
861 	struct task_pend_token pend_token = {};
862 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
863 
864 	assert(qos < THREAD_QOS_LAST);
865 	assert(thread->static_param);
866 
867 	if (!thread->static_param || !thread->active) {
868 		return;
869 	}
870 
871 	spl_t s = splsched();
872 	thread_lock(thread);
873 
874 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
875 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
876 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
877 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
878 	    0, &pend_token);
879 
880 	thread_unlock(thread);
881 	splx(s);
882 
883 	/* Concern: this doesn't hold the mutex... */
884 
885 	__assert_only kern_return_t kr;
886 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
887 	    &pend_token);
888 	assert(kr == KERN_SUCCESS);
889 
890 	if (pend_token.tpt_update_thread_sfi) {
891 		sfi_reevaluate(thread);
892 	}
893 }
894 
895 /*
896  * thread_set_mode_and_absolute_pri:
897  *
898  * Set scheduling policy & absolute priority for thread, for deprecated
899  * thread_set_policy and thread_policy interfaces.
900  *
901  * Called with nothing locked.
902  */
903 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)904 thread_set_mode_and_absolute_pri(thread_t   thread,
905     integer_t  policy,
906     integer_t  priority)
907 {
908 	kern_return_t kr = KERN_SUCCESS;
909 	struct task_pend_token pend_token = {};
910 
911 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
912 
913 	thread_mtx_lock(thread);
914 
915 	if (!thread->active) {
916 		kr = KERN_TERMINATED;
917 		goto unlock;
918 	}
919 
920 	if (thread_is_static_param(thread)) {
921 		kr = KERN_POLICY_STATIC;
922 		goto unlock;
923 	}
924 
925 	/* Setting legacy policies on threads kills the current QoS */
926 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
927 		thread_remove_qos_policy_locked(thread, &pend_token);
928 	}
929 
930 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
931 
932 unlock:
933 	thread_mtx_unlock(thread);
934 
935 	thread_policy_update_complete_unlocked(thread, &pend_token);
936 
937 	return kr;
938 }
939 
940 /*
941  * Set the thread's requested mode and recompute priority
942  * Called with thread mutex and thread locked
943  *
944  * TODO: Mitigate potential problems caused by moving thread to end of runq
945  * whenever its priority is recomputed
946  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
947  */
948 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)949 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
950 {
951 	if (thread->policy_reset) {
952 		return;
953 	}
954 
955 	boolean_t removed = thread_run_queue_remove(thread);
956 
957 	sched_set_thread_mode_user(thread, mode);
958 
959 	thread_recompute_priority(thread);
960 
961 	if (removed) {
962 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
963 	}
964 }
965 
966 /* called at splsched with thread lock locked */
967 static void
thread_update_qos_cpu_time_locked(thread_t thread)968 thread_update_qos_cpu_time_locked(thread_t thread)
969 {
970 	task_t task = get_threadtask(thread);
971 	uint64_t timer_sum, timer_delta;
972 
973 	/*
974 	 * This is only as accurate the thread's last context switch or user/kernel
975 	 * transition (unless precise user/kernel time is disabled).
976 	 *
977 	 * TODO: Consider running an update operation here to update it first.
978 	 *       Maybe doable with interrupts disabled from current thread.
979 	 *       If the thread is on a different core, may not be easy to get right.
980 	 */
981 
982 	timer_sum = recount_thread_time_mach(thread);
983 	timer_delta = timer_sum - thread->vtimer_qos_save;
984 
985 	thread->vtimer_qos_save = timer_sum;
986 
987 	uint64_t* task_counter = NULL;
988 
989 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
990 	switch (thread->effective_policy.thep_qos) {
991 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
992 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
993 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
994 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
995 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
996 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
997 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
998 	default:
999 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1000 	}
1001 
1002 	OSAddAtomic64(timer_delta, task_counter);
1003 
1004 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1005 	switch (thread->requested_policy.thrp_qos) {
1006 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1007 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1008 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1009 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1010 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1011 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1012 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1013 	default:
1014 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1015 	}
1016 
1017 	OSAddAtomic64(timer_delta, task_counter);
1018 }
1019 
1020 /*
1021  * called with no thread locks held
1022  * may hold task lock
1023  */
1024 void
thread_update_qos_cpu_time(thread_t thread)1025 thread_update_qos_cpu_time(thread_t thread)
1026 {
1027 	thread_mtx_lock(thread);
1028 
1029 	spl_t s = splsched();
1030 	thread_lock(thread);
1031 
1032 	thread_update_qos_cpu_time_locked(thread);
1033 
1034 	thread_unlock(thread);
1035 	splx(s);
1036 
1037 	thread_mtx_unlock(thread);
1038 }
1039 
1040 /*
1041  * Calculate base priority from thread attributes, and set it on the thread
1042  *
1043  * Called with thread_lock and thread mutex held.
1044  */
1045 void
thread_recompute_priority(thread_t thread)1046 thread_recompute_priority(
1047 	thread_t                thread)
1048 {
1049 	integer_t               priority;
1050 	integer_t               adj_priority;
1051 	bool                    wi_priority = false;
1052 
1053 	if (thread->policy_reset) {
1054 		return;
1055 	}
1056 
1057 	if (thread->sched_mode == TH_MODE_REALTIME) {
1058 		uint8_t i = thread->realtime.priority_offset;
1059 		assert((i >= 0) && (i < NRTQS));
1060 		priority = BASEPRI_RTQUEUES + i;
1061 
1062 		sched_set_thread_base_priority(thread, priority);
1063 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1064 			/* Make sure the thread has a valid deadline */
1065 			uint64_t ctime = mach_absolute_time();
1066 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1067 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1068 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1069 		}
1070 		return;
1071 
1072 		/*
1073 		 * A thread may have joined a RT work interval but then never
1074 		 * changed its sched mode or have been demoted. RT work
1075 		 * intervals will have RT priorities - ignore the priority if
1076 		 * the thread isn't RT.
1077 		 */
1078 	} else if (thread->effective_policy.thep_wi_driven &&
1079 	    work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1080 		priority = work_interval_get_priority(thread);
1081 		wi_priority = true;
1082 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1083 		int qos = thread->effective_policy.thep_qos;
1084 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1085 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1086 		int qos_scaled_relprio;
1087 
1088 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1089 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1090 
1091 		priority = thread_qos_policy_params.qos_pri[qos];
1092 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1093 
1094 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1095 			/* Bump priority 46 to 47 when in a frontmost app */
1096 			qos_scaled_relprio += 1;
1097 		}
1098 
1099 		/* TODO: factor in renice priority here? */
1100 
1101 		priority += qos_scaled_relprio;
1102 	} else {
1103 		if (thread->importance > MAXPRI) {
1104 			priority = MAXPRI;
1105 		} else if (thread->importance < -MAXPRI) {
1106 			priority = -MAXPRI;
1107 		} else {
1108 			priority = thread->importance;
1109 		}
1110 
1111 		priority += thread->task_priority;
1112 	}
1113 
1114 	/* Boost the priority of threads which are RT demoted. */
1115 	if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1116 		priority = MAX(priority, MAXPRI_USER);
1117 	}
1118 
1119 	priority = MAX(priority, thread->user_promotion_basepri);
1120 
1121 	/*
1122 	 * Clamp priority back into the allowed range for this task.
1123 	 *  The initial priority value could be out of this range due to:
1124 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1125 	 *      Task is user task (max-pri is 63)
1126 	 *      Task is kernel task (max-pri is 95)
1127 	 * Note that thread->importance is user-settable to any integer
1128 	 * via THREAD_PRECEDENCE_POLICY.
1129 	 */
1130 	adj_priority = priority;
1131 	adj_priority = MIN(adj_priority, thread->max_priority);
1132 	adj_priority = MAX(adj_priority, MINPRI);
1133 
1134 	/* Allow workload driven priorities to exceed max_priority. */
1135 	if (wi_priority) {
1136 		adj_priority = MAX(adj_priority, priority);
1137 	}
1138 
1139 	/* Allow priority to exceed max_priority for promotions. */
1140 	if (thread->effective_policy.thep_promote_above_task) {
1141 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1142 	}
1143 	priority = adj_priority;
1144 	assert3u(priority, <=, MAXPRI);
1145 
1146 	if (thread->saved_mode == TH_MODE_REALTIME &&
1147 	    sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1148 		priority = DEPRESSPRI;
1149 	}
1150 
1151 	if (thread->effective_policy.thep_terminated == TRUE) {
1152 		/*
1153 		 * We temporarily want to override the expected priority to
1154 		 * ensure that the thread exits in a timely manner.
1155 		 * Note that this is allowed to exceed thread->max_priority
1156 		 * so that the thread is no longer clamped to background
1157 		 * during the final exit phase.
1158 		 */
1159 		if (priority < thread->task_priority) {
1160 			priority = thread->task_priority;
1161 		}
1162 		if (priority < BASEPRI_DEFAULT) {
1163 			priority = BASEPRI_DEFAULT;
1164 		}
1165 	}
1166 
1167 #if !defined(XNU_TARGET_OS_OSX)
1168 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1169 	if (priority < MAXPRI_THROTTLE) {
1170 		priority = MAXPRI_THROTTLE;
1171 	}
1172 #endif /* !defined(XNU_TARGET_OS_OSX) */
1173 
1174 	sched_set_thread_base_priority(thread, priority);
1175 }
1176 
1177 /* Called with the task lock held, but not the thread mutex or spinlock */
1178 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1179 thread_policy_update_tasklocked(
1180 	thread_t           thread,
1181 	integer_t          priority,
1182 	integer_t          max_priority,
1183 	task_pend_token_t  pend_token)
1184 {
1185 	thread_mtx_lock(thread);
1186 
1187 	if (!thread->active || thread->policy_reset) {
1188 		thread_mtx_unlock(thread);
1189 		return;
1190 	}
1191 
1192 	spl_t s = splsched();
1193 	thread_lock(thread);
1194 
1195 	__unused
1196 	integer_t old_max_priority = thread->max_priority;
1197 
1198 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1199 	thread->task_priority = (int16_t)priority;
1200 
1201 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1202 	thread->max_priority = (int16_t)max_priority;
1203 
1204 	/*
1205 	 * When backgrounding a thread, realtime and fixed priority threads
1206 	 * should be demoted to timeshare background threads.
1207 	 *
1208 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1209 	 * remove/reinsert for a runnable thread
1210 	 */
1211 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1212 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1213 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1214 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1215 	}
1216 
1217 	thread_policy_update_spinlocked(thread, true, pend_token);
1218 
1219 	thread_unlock(thread);
1220 	splx(s);
1221 
1222 	thread_mtx_unlock(thread);
1223 }
1224 
1225 /*
1226  * Reset thread to default state in preparation for termination
1227  * Called with thread mutex locked
1228  *
1229  * Always called on current thread, so we don't need a run queue remove
1230  */
1231 void
thread_policy_reset(thread_t thread)1232 thread_policy_reset(
1233 	thread_t                thread)
1234 {
1235 	spl_t           s;
1236 
1237 	assert(thread == current_thread());
1238 
1239 	s = splsched();
1240 	thread_lock(thread);
1241 
1242 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1243 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1244 	}
1245 
1246 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1247 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1248 	}
1249 
1250 	if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1251 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1252 	}
1253 
1254 	/* At this point, the various demotions should be inactive */
1255 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1256 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1257 
1258 	/* Reset thread back to task-default basepri and mode  */
1259 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1260 
1261 	sched_set_thread_mode(thread, newmode);
1262 
1263 	thread->importance = 0;
1264 
1265 	/* Prevent further changes to thread base priority or mode */
1266 	thread->policy_reset = 1;
1267 
1268 	sched_set_thread_base_priority(thread, thread->task_priority);
1269 
1270 	thread_unlock(thread);
1271 	splx(s);
1272 }
1273 
1274 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1275 thread_policy_get(
1276 	thread_t                                thread,
1277 	thread_policy_flavor_t  flavor,
1278 	thread_policy_t                 policy_info,
1279 	mach_msg_type_number_t  *count,
1280 	boolean_t                               *get_default)
1281 {
1282 	kern_return_t                   result = KERN_SUCCESS;
1283 
1284 	if (thread == THREAD_NULL) {
1285 		return KERN_INVALID_ARGUMENT;
1286 	}
1287 
1288 	thread_mtx_lock(thread);
1289 	if (!thread->active) {
1290 		thread_mtx_unlock(thread);
1291 
1292 		return KERN_TERMINATED;
1293 	}
1294 
1295 	switch (flavor) {
1296 	case THREAD_EXTENDED_POLICY:
1297 	{
1298 		boolean_t               timeshare = TRUE;
1299 
1300 		if (!(*get_default)) {
1301 			spl_t s = splsched();
1302 			thread_lock(thread);
1303 
1304 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1305 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1306 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1307 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1308 				} else {
1309 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1310 				}
1311 			} else {
1312 				*get_default = TRUE;
1313 			}
1314 
1315 			thread_unlock(thread);
1316 			splx(s);
1317 		}
1318 
1319 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1320 			thread_extended_policy_t        info;
1321 
1322 			info = (thread_extended_policy_t)policy_info;
1323 			info->timeshare = timeshare;
1324 		}
1325 
1326 		break;
1327 	}
1328 
1329 	case THREAD_TIME_CONSTRAINT_POLICY:
1330 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1331 	{
1332 		thread_time_constraint_with_priority_policy_t         info;
1333 
1334 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1335 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1336 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1337 
1338 		if (*count < min_count) {
1339 			result = KERN_INVALID_ARGUMENT;
1340 			break;
1341 		}
1342 
1343 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1344 
1345 		if (!(*get_default)) {
1346 			spl_t s = splsched();
1347 			thread_lock(thread);
1348 
1349 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1350 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1351 				info->period = thread->realtime.period;
1352 				info->computation = thread->realtime.computation;
1353 				info->constraint = thread->realtime.constraint;
1354 				info->preemptible = thread->realtime.preemptible;
1355 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1356 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1357 				}
1358 			} else {
1359 				*get_default = TRUE;
1360 			}
1361 
1362 			thread_unlock(thread);
1363 			splx(s);
1364 		}
1365 
1366 		if (*get_default) {
1367 			info->period = 0;
1368 			info->computation = default_timeshare_computation;
1369 			info->constraint = default_timeshare_constraint;
1370 			info->preemptible = TRUE;
1371 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1372 				info->priority = BASEPRI_RTQUEUES;
1373 			}
1374 		}
1375 
1376 
1377 		break;
1378 	}
1379 
1380 	case THREAD_PRECEDENCE_POLICY:
1381 	{
1382 		thread_precedence_policy_t              info;
1383 
1384 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1385 			result = KERN_INVALID_ARGUMENT;
1386 			break;
1387 		}
1388 
1389 		info = (thread_precedence_policy_t)policy_info;
1390 
1391 		if (!(*get_default)) {
1392 			spl_t s = splsched();
1393 			thread_lock(thread);
1394 
1395 			info->importance = thread->importance;
1396 
1397 			thread_unlock(thread);
1398 			splx(s);
1399 		} else {
1400 			info->importance = 0;
1401 		}
1402 
1403 		break;
1404 	}
1405 
1406 	case THREAD_AFFINITY_POLICY:
1407 	{
1408 		thread_affinity_policy_t                info;
1409 
1410 		if (!thread_affinity_is_supported()) {
1411 			result = KERN_NOT_SUPPORTED;
1412 			break;
1413 		}
1414 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1415 			result = KERN_INVALID_ARGUMENT;
1416 			break;
1417 		}
1418 
1419 		info = (thread_affinity_policy_t)policy_info;
1420 
1421 		if (!(*get_default)) {
1422 			info->affinity_tag = thread_affinity_get(thread);
1423 		} else {
1424 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1425 		}
1426 
1427 		break;
1428 	}
1429 
1430 	case THREAD_POLICY_STATE:
1431 	{
1432 		thread_policy_state_t           info;
1433 
1434 		if (*count < THREAD_POLICY_STATE_COUNT) {
1435 			result = KERN_INVALID_ARGUMENT;
1436 			break;
1437 		}
1438 
1439 		/* Only root can get this info */
1440 		if (!task_is_privileged(current_task())) {
1441 			result = KERN_PROTECTION_FAILURE;
1442 			break;
1443 		}
1444 
1445 		info = (thread_policy_state_t)(void*)policy_info;
1446 
1447 		if (!(*get_default)) {
1448 			info->flags = 0;
1449 
1450 			spl_t s = splsched();
1451 			thread_lock(thread);
1452 
1453 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1454 
1455 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1456 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1457 
1458 			info->thps_user_promotions          = 0;
1459 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1460 			info->thps_ipc_overrides            = thread->kevent_overrides;
1461 
1462 			proc_get_thread_policy_bitfield(thread, info);
1463 
1464 			thread_unlock(thread);
1465 			splx(s);
1466 		} else {
1467 			info->requested = 0;
1468 			info->effective = 0;
1469 			info->pending = 0;
1470 		}
1471 
1472 		break;
1473 	}
1474 
1475 	case THREAD_REQUESTED_STATE_POLICY:
1476 	{
1477 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1478 			result = KERN_INVALID_ARGUMENT;
1479 			break;
1480 		}
1481 
1482 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1483 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1484 
1485 		info->thrq_base_qos = req_policy->thrp_qos;
1486 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1487 		info->thrq_qos_override = req_policy->thrp_qos_override;
1488 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1489 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1490 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1491 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1492 
1493 		break;
1494 	}
1495 
1496 	case THREAD_LATENCY_QOS_POLICY:
1497 	{
1498 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1499 		thread_latency_qos_t plqos;
1500 
1501 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1502 			result = KERN_INVALID_ARGUMENT;
1503 			break;
1504 		}
1505 
1506 		if (*get_default) {
1507 			plqos = 0;
1508 		} else {
1509 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1510 		}
1511 
1512 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1513 	}
1514 	break;
1515 
1516 	case THREAD_THROUGHPUT_QOS_POLICY:
1517 	{
1518 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1519 		thread_throughput_qos_t ptqos;
1520 
1521 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1522 			result = KERN_INVALID_ARGUMENT;
1523 			break;
1524 		}
1525 
1526 		if (*get_default) {
1527 			ptqos = 0;
1528 		} else {
1529 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1530 		}
1531 
1532 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1533 	}
1534 	break;
1535 
1536 	case THREAD_QOS_POLICY:
1537 	{
1538 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1539 
1540 		if (*count < THREAD_QOS_POLICY_COUNT) {
1541 			result = KERN_INVALID_ARGUMENT;
1542 			break;
1543 		}
1544 
1545 		if (!(*get_default)) {
1546 			int relprio_value = 0;
1547 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1548 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1549 
1550 			info->tier_importance = -relprio_value;
1551 		} else {
1552 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1553 			info->tier_importance = 0;
1554 		}
1555 
1556 		break;
1557 	}
1558 
1559 	default:
1560 		result = KERN_INVALID_ARGUMENT;
1561 		break;
1562 	}
1563 
1564 	thread_mtx_unlock(thread);
1565 
1566 	return result;
1567 }
1568 
1569 void
thread_policy_create(thread_t thread)1570 thread_policy_create(thread_t thread)
1571 {
1572 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1573 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1574 	    thread_tid(thread), theffective_0(thread),
1575 	    theffective_1(thread), thread->base_pri, 0);
1576 
1577 	/* We pass a pend token but ignore it */
1578 	struct task_pend_token pend_token = {};
1579 
1580 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1581 
1582 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1583 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1584 	    thread_tid(thread), theffective_0(thread),
1585 	    theffective_1(thread), thread->base_pri, 0);
1586 }
1587 
1588 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1589 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1590 {
1591 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1592 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1593 	    thread_tid(thread), theffective_0(thread),
1594 	    theffective_1(thread), thread->base_pri, 0);
1595 
1596 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1597 
1598 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1599 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1600 	    thread_tid(thread), theffective_0(thread),
1601 	    theffective_1(thread), thread->base_pri, 0);
1602 }
1603 
1604 
1605 
1606 /*
1607  * One thread state update function TO RULE THEM ALL
1608  *
1609  * This function updates the thread effective policy fields
1610  * and pushes the results to the relevant subsystems.
1611  *
1612  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1613  */
1614 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1615 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1616     task_pend_token_t pend_token)
1617 {
1618 	/*
1619 	 * Step 1:
1620 	 *  Gather requested policy and effective task state
1621 	 */
1622 
1623 	const struct thread_requested_policy requested = thread->requested_policy;
1624 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1625 
1626 	/*
1627 	 * Step 2:
1628 	 *  Calculate new effective policies from requested policy, task and thread state
1629 	 *  Rules:
1630 	 *      Don't change requested, it won't take effect
1631 	 */
1632 
1633 	struct thread_effective_policy next = {};
1634 
1635 	next.thep_wi_driven = requested.thrp_wi_driven;
1636 
1637 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1638 
1639 	uint32_t next_qos = requested.thrp_qos;
1640 
1641 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1642 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1643 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1644 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1645 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1646 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1647 	}
1648 
1649 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1650 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1651 		/*
1652 		 * This thread is turnstile-boosted higher than the adaptive clamp
1653 		 * by a synchronous waiter. Allow that to override the adaptive
1654 		 * clamp temporarily for this thread only.
1655 		 */
1656 		next.thep_promote_above_task = true;
1657 		next_qos = requested.thrp_qos_promote;
1658 	}
1659 
1660 	next.thep_qos = next_qos;
1661 
1662 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1663 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1664 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1665 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1666 		} else {
1667 			next.thep_qos = task_effective.tep_qos_clamp;
1668 		}
1669 		next.thep_wi_driven = 0;
1670 	}
1671 
1672 	/*
1673 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1674 	 * This allows QoS promotions to work properly even after the process is unclamped.
1675 	 */
1676 	next.thep_qos_promote = next.thep_qos;
1677 
1678 	/* The ceiling only applies to threads that are in the QoS world */
1679 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1680 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1681 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1682 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1683 	}
1684 
1685 	/*
1686 	 * The QoS relative priority is only applicable when the original programmer's
1687 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1688 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1689 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1690 	 * presence of boosting, the programmer doesn't know what other actors
1691 	 * are boosting the thread.
1692 	 */
1693 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1694 	    (requested.thrp_qos == next.thep_qos) &&
1695 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1696 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1697 	} else {
1698 		next.thep_qos_relprio = 0;
1699 	}
1700 
1701 	/* Calculate DARWIN_BG */
1702 	bool wants_darwinbg        = false;
1703 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1704 
1705 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1706 		wants_darwinbg = true;
1707 	}
1708 
1709 	/*
1710 	 * If DARWIN_BG has been requested at either level, it's engaged.
1711 	 * darwinbg threads always create bg sockets,
1712 	 * but only some types of darwinbg change the sockets
1713 	 * after they're created
1714 	 */
1715 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1716 		wants_all_sockets_bg = wants_darwinbg = true;
1717 	}
1718 
1719 	if (requested.thrp_pidbind_bg) {
1720 		wants_all_sockets_bg = wants_darwinbg = true;
1721 	}
1722 
1723 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1724 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1725 		wants_darwinbg = true;
1726 	}
1727 
1728 	/* Calculate side effects of DARWIN_BG */
1729 
1730 	if (wants_darwinbg) {
1731 		next.thep_darwinbg = 1;
1732 		next.thep_wi_driven = 0;
1733 	}
1734 
1735 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1736 		next.thep_new_sockets_bg = 1;
1737 	}
1738 
1739 	/* Don't use task_effective.tep_all_sockets_bg here */
1740 	if (wants_all_sockets_bg) {
1741 		next.thep_all_sockets_bg = 1;
1742 	}
1743 
1744 	/* darwinbg implies background QOS (or lower) */
1745 	if (next.thep_darwinbg &&
1746 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1747 		next.thep_qos = THREAD_QOS_BACKGROUND;
1748 		next.thep_qos_relprio = 0;
1749 	}
1750 
1751 	/* Calculate IO policy */
1752 
1753 	int iopol = THROTTLE_LEVEL_TIER0;
1754 
1755 	/* Factor in the task's IO policy */
1756 	if (next.thep_darwinbg) {
1757 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1758 	}
1759 
1760 	if (!next.thep_promote_above_task) {
1761 		iopol = MAX(iopol, task_effective.tep_io_tier);
1762 	}
1763 
1764 	/* Look up the associated IO tier value for the QoS class */
1765 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1766 
1767 	iopol = MAX(iopol, requested.thrp_int_iotier);
1768 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1769 
1770 	/* Apply the kevent iotier override */
1771 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1772 
1773 	next.thep_io_tier = iopol;
1774 
1775 	/*
1776 	 * If a QoS override is causing IO to go into a lower tier, we also set
1777 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1778 	 * window when the override goes away.
1779 	 */
1780 
1781 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1782 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1783 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1784 
1785 	/* Calculate Passive IO policy */
1786 	if (requested.thrp_ext_iopassive ||
1787 	    requested.thrp_int_iopassive ||
1788 	    qos_io_override_active ||
1789 	    task_effective.tep_io_passive) {
1790 		next.thep_io_passive = 1;
1791 	}
1792 
1793 	/* Calculate timer QOS */
1794 	uint32_t latency_qos = requested.thrp_latency_qos;
1795 
1796 	if (!next.thep_promote_above_task) {
1797 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1798 	}
1799 
1800 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1801 
1802 	next.thep_latency_qos = latency_qos;
1803 
1804 	/* Calculate throughput QOS */
1805 	uint32_t through_qos = requested.thrp_through_qos;
1806 
1807 	if (!next.thep_promote_above_task) {
1808 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1809 	}
1810 
1811 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1812 
1813 	next.thep_through_qos = through_qos;
1814 
1815 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1816 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1817 		next.thep_terminated    = 1;
1818 		next.thep_darwinbg      = 0;
1819 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1820 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1821 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1822 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1823 		next.thep_wi_driven     = 0;
1824 	}
1825 
1826 	/*
1827 	 * Step 3:
1828 	 *  Swap out old policy for new policy
1829 	 */
1830 
1831 	struct thread_effective_policy prev = thread->effective_policy;
1832 
1833 	thread_update_qos_cpu_time_locked(thread);
1834 
1835 	/* This is the point where the new values become visible to other threads */
1836 	thread->effective_policy = next;
1837 
1838 	/*
1839 	 * Step 4:
1840 	 *  Pend updates that can't be done while holding the thread lock
1841 	 */
1842 
1843 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1844 		pend_token->tpt_update_sockets = 1;
1845 	}
1846 
1847 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1848 	if (prev.thep_io_tier != next.thep_io_tier) {
1849 		pend_token->tpt_update_throttle = 1;
1850 	}
1851 
1852 	/*
1853 	 * Check for the attributes that sfi_thread_classify() consults,
1854 	 *  and trigger SFI re-evaluation.
1855 	 */
1856 	if (prev.thep_qos != next.thep_qos ||
1857 	    prev.thep_darwinbg != next.thep_darwinbg) {
1858 		pend_token->tpt_update_thread_sfi = 1;
1859 	}
1860 
1861 	integer_t old_base_pri = thread->base_pri;
1862 
1863 	/*
1864 	 * Step 5:
1865 	 *  Update other subsystems as necessary if something has changed
1866 	 */
1867 
1868 	/* Check for the attributes that thread_recompute_priority() consults */
1869 	if (prev.thep_qos != next.thep_qos ||
1870 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1871 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1872 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1873 	    prev.thep_terminated != next.thep_terminated ||
1874 	    prev.thep_wi_driven != next.thep_wi_driven ||
1875 	    pend_token->tpt_force_recompute_pri == 1 ||
1876 	    recompute_priority) {
1877 		thread_recompute_priority(thread);
1878 	}
1879 
1880 	/*
1881 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1882 	 */
1883 	if (pend_token->tpt_update_turnstile &&
1884 	    ((old_base_pri == thread->base_pri) ||
1885 	    !thread_get_waiting_turnstile(thread))) {
1886 		/*
1887 		 * Reset update turnstile pend token since either
1888 		 * the thread priority did not change or thread is
1889 		 * not blocked on a turnstile.
1890 		 */
1891 		pend_token->tpt_update_turnstile = 0;
1892 	}
1893 }
1894 
1895 
1896 /*
1897  * Initiate a thread policy state transition on a thread with its TID
1898  * Useful if you cannot guarantee the thread won't get terminated
1899  * Precondition: No locks are held
1900  * Will take task lock - using the non-tid variant is faster
1901  * if you already have a thread ref.
1902  */
1903 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1904 proc_set_thread_policy_with_tid(task_t     task,
1905     uint64_t   tid,
1906     int        category,
1907     int        flavor,
1908     int        value)
1909 {
1910 	/* takes task lock, returns ref'ed thread or NULL */
1911 	thread_t thread = task_findtid(task, tid);
1912 
1913 	if (thread == THREAD_NULL) {
1914 		return;
1915 	}
1916 
1917 	proc_set_thread_policy(thread, category, flavor, value);
1918 
1919 	thread_deallocate(thread);
1920 }
1921 
1922 /*
1923  * Initiate a thread policy transition on a thread
1924  * This path supports networking transitions (i.e. darwinbg transitions)
1925  * Precondition: No locks are held
1926  */
1927 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1928 proc_set_thread_policy(thread_t   thread,
1929     int        category,
1930     int        flavor,
1931     int        value)
1932 {
1933 	proc_set_thread_policy_ext(thread, category, flavor, value, 0);
1934 }
1935 
1936 void
proc_set_thread_policy_ext(thread_t thread,int category,int flavor,int value,int value2)1937 proc_set_thread_policy_ext(thread_t   thread,
1938     int        category,
1939     int        flavor,
1940     int        value,
1941     int        value2)
1942 {
1943 	struct task_pend_token pend_token = {};
1944 
1945 	thread_mtx_lock(thread);
1946 
1947 	proc_set_thread_policy_locked(thread, category, flavor, value, value2, &pend_token);
1948 
1949 	thread_mtx_unlock(thread);
1950 
1951 	thread_policy_update_complete_unlocked(thread, &pend_token);
1952 }
1953 
1954 /*
1955  * Do the things that can't be done while holding a thread mutex.
1956  * These are set up to call back into thread policy to get the latest value,
1957  * so they don't have to be synchronized with the update.
1958  * The only required semantic is 'call this sometime after updating effective policy'
1959  *
1960  * Precondition: Thread mutex is not held
1961  *
1962  * This may be called with the task lock held, but in that case it won't be
1963  * called with tpt_update_sockets set.
1964  */
1965 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1966 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1967 {
1968 #ifdef MACH_BSD
1969 	if (pend_token->tpt_update_sockets) {
1970 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1971 	}
1972 #endif /* MACH_BSD */
1973 
1974 	if (pend_token->tpt_update_throttle) {
1975 		rethrottle_thread(get_bsdthread_info(thread));
1976 	}
1977 
1978 	if (pend_token->tpt_update_thread_sfi) {
1979 		sfi_reevaluate(thread);
1980 	}
1981 
1982 	if (pend_token->tpt_update_turnstile) {
1983 		turnstile_update_thread_priority_chain(thread);
1984 	}
1985 }
1986 
1987 /*
1988  * Set and update thread policy
1989  * Thread mutex might be held
1990  */
1991 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1992 proc_set_thread_policy_locked(thread_t          thread,
1993     int               category,
1994     int               flavor,
1995     int               value,
1996     int               value2,
1997     task_pend_token_t pend_token)
1998 {
1999 	spl_t s = splsched();
2000 	thread_lock(thread);
2001 
2002 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2003 
2004 	thread_unlock(thread);
2005 	splx(s);
2006 }
2007 
2008 /*
2009  * Set and update thread policy
2010  * Thread spinlock is held
2011  */
2012 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2013 proc_set_thread_policy_spinlocked(thread_t          thread,
2014     int               category,
2015     int               flavor,
2016     int               value,
2017     int               value2,
2018     task_pend_token_t pend_token)
2019 {
2020 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2021 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2022 	    thread_tid(thread), threquested_0(thread),
2023 	    threquested_1(thread), value, 0);
2024 
2025 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2026 
2027 	thread_policy_update_spinlocked(thread, false, pend_token);
2028 
2029 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2030 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2031 	    thread_tid(thread), threquested_0(thread),
2032 	    threquested_1(thread), tpending(pend_token), 0);
2033 }
2034 
2035 /*
2036  * Set the requested state for a specific flavor to a specific value.
2037  */
2038 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2039 thread_set_requested_policy_spinlocked(thread_t     thread,
2040     int               category,
2041     int               flavor,
2042     int               value,
2043     int               value2,
2044     task_pend_token_t pend_token)
2045 {
2046 	int tier, passive;
2047 
2048 	struct thread_requested_policy requested = thread->requested_policy;
2049 
2050 	switch (flavor) {
2051 	/* Category: EXTERNAL and INTERNAL, thread and task */
2052 
2053 	case TASK_POLICY_DARWIN_BG:
2054 		if (category == TASK_POLICY_EXTERNAL) {
2055 			requested.thrp_ext_darwinbg = value;
2056 		} else {
2057 			requested.thrp_int_darwinbg = value;
2058 		}
2059 		pend_token->tpt_update_turnstile = 1;
2060 		break;
2061 
2062 	case TASK_POLICY_IOPOL:
2063 		proc_iopol_to_tier(value, &tier, &passive);
2064 		if (category == TASK_POLICY_EXTERNAL) {
2065 			requested.thrp_ext_iotier  = tier;
2066 			requested.thrp_ext_iopassive = passive;
2067 		} else {
2068 			requested.thrp_int_iotier  = tier;
2069 			requested.thrp_int_iopassive = passive;
2070 		}
2071 		break;
2072 
2073 	case TASK_POLICY_IO:
2074 		if (category == TASK_POLICY_EXTERNAL) {
2075 			requested.thrp_ext_iotier = value;
2076 		} else {
2077 			requested.thrp_int_iotier = value;
2078 		}
2079 		break;
2080 
2081 	case TASK_POLICY_PASSIVE_IO:
2082 		if (category == TASK_POLICY_EXTERNAL) {
2083 			requested.thrp_ext_iopassive = value;
2084 		} else {
2085 			requested.thrp_int_iopassive = value;
2086 		}
2087 		break;
2088 
2089 	/* Category: ATTRIBUTE, thread only */
2090 
2091 	case TASK_POLICY_PIDBIND_BG:
2092 		assert(category == TASK_POLICY_ATTRIBUTE);
2093 		requested.thrp_pidbind_bg = value;
2094 		pend_token->tpt_update_turnstile = 1;
2095 		break;
2096 
2097 	case TASK_POLICY_LATENCY_QOS:
2098 		assert(category == TASK_POLICY_ATTRIBUTE);
2099 		requested.thrp_latency_qos = value;
2100 		break;
2101 
2102 	case TASK_POLICY_THROUGH_QOS:
2103 		assert(category == TASK_POLICY_ATTRIBUTE);
2104 		requested.thrp_through_qos = value;
2105 		break;
2106 
2107 	case TASK_POLICY_QOS_OVERRIDE:
2108 		assert(category == TASK_POLICY_ATTRIBUTE);
2109 		requested.thrp_qos_override = value;
2110 		pend_token->tpt_update_turnstile = 1;
2111 		break;
2112 
2113 	case TASK_POLICY_QOS_AND_RELPRIO:
2114 		assert(category == TASK_POLICY_ATTRIBUTE);
2115 		requested.thrp_qos = value;
2116 		requested.thrp_qos_relprio = value2;
2117 		pend_token->tpt_update_turnstile = 1;
2118 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2119 		break;
2120 
2121 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2122 		assert(category == TASK_POLICY_ATTRIBUTE);
2123 		requested.thrp_qos_workq_override = value;
2124 		pend_token->tpt_update_turnstile = 1;
2125 		break;
2126 
2127 	case TASK_POLICY_QOS_PROMOTE:
2128 		assert(category == TASK_POLICY_ATTRIBUTE);
2129 		requested.thrp_qos_promote = value;
2130 		break;
2131 
2132 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2133 		assert(category == TASK_POLICY_ATTRIBUTE);
2134 		requested.thrp_qos_kevent_override = value;
2135 		pend_token->tpt_update_turnstile = 1;
2136 		break;
2137 
2138 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2139 		assert(category == TASK_POLICY_ATTRIBUTE);
2140 		requested.thrp_qos_wlsvc_override = value;
2141 		pend_token->tpt_update_turnstile = 1;
2142 		break;
2143 
2144 	case TASK_POLICY_TERMINATED:
2145 		assert(category == TASK_POLICY_ATTRIBUTE);
2146 		requested.thrp_terminated = value;
2147 		break;
2148 
2149 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2150 		assert(category == TASK_POLICY_ATTRIBUTE);
2151 		requested.thrp_iotier_kevent_override = value;
2152 		break;
2153 
2154 	case TASK_POLICY_WI_DRIVEN:
2155 		assert(category == TASK_POLICY_ATTRIBUTE);
2156 		assert(thread == current_thread());
2157 
2158 		const bool set_policy = value;
2159 		const sched_mode_t mode = value2;
2160 
2161 		requested.thrp_wi_driven = set_policy ? 1 : 0;
2162 
2163 		/*
2164 		 * No sched mode change for REALTIME (threads must explicitly
2165 		 * opt-in), however the priority_offset needs to be updated.
2166 		 */
2167 		if (mode == TH_MODE_REALTIME) {
2168 			const int pri = work_interval_get_priority(thread);
2169 			assert3u(pri, >=, BASEPRI_RTQUEUES);
2170 			thread->realtime.priority_offset = set_policy ?
2171 			    (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2172 		} else {
2173 			sched_set_thread_mode_user(thread, mode);
2174 			if (set_policy) {
2175 				thread->static_param = true;
2176 			}
2177 		}
2178 		break;
2179 
2180 	default:
2181 		panic("unknown task policy: %d %d %d", category, flavor, value);
2182 		break;
2183 	}
2184 
2185 	thread->requested_policy = requested;
2186 }
2187 
2188 /*
2189  * Gets what you set. Effective values may be different.
2190  * Precondition: No locks are held
2191  */
2192 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2193 proc_get_thread_policy(thread_t   thread,
2194     int        category,
2195     int        flavor)
2196 {
2197 	int value = 0;
2198 	thread_mtx_lock(thread);
2199 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2200 	thread_mtx_unlock(thread);
2201 	return value;
2202 }
2203 
2204 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2205 proc_get_thread_policy_locked(thread_t   thread,
2206     int        category,
2207     int        flavor,
2208     int*       value2)
2209 {
2210 	int value = 0;
2211 
2212 	spl_t s = splsched();
2213 	thread_lock(thread);
2214 
2215 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2216 
2217 	thread_unlock(thread);
2218 	splx(s);
2219 
2220 	return value;
2221 }
2222 
2223 /*
2224  * Gets what you set. Effective values may be different.
2225  */
2226 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2227 thread_get_requested_policy_spinlocked(thread_t thread,
2228     int      category,
2229     int      flavor,
2230     int*     value2)
2231 {
2232 	int value = 0;
2233 
2234 	struct thread_requested_policy requested = thread->requested_policy;
2235 
2236 	switch (flavor) {
2237 	case TASK_POLICY_DARWIN_BG:
2238 		if (category == TASK_POLICY_EXTERNAL) {
2239 			value = requested.thrp_ext_darwinbg;
2240 		} else {
2241 			value = requested.thrp_int_darwinbg;
2242 		}
2243 		break;
2244 	case TASK_POLICY_IOPOL:
2245 		if (category == TASK_POLICY_EXTERNAL) {
2246 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2247 			    requested.thrp_ext_iopassive);
2248 		} else {
2249 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2250 			    requested.thrp_int_iopassive);
2251 		}
2252 		break;
2253 	case TASK_POLICY_IO:
2254 		if (category == TASK_POLICY_EXTERNAL) {
2255 			value = requested.thrp_ext_iotier;
2256 		} else {
2257 			value = requested.thrp_int_iotier;
2258 		}
2259 		break;
2260 	case TASK_POLICY_PASSIVE_IO:
2261 		if (category == TASK_POLICY_EXTERNAL) {
2262 			value = requested.thrp_ext_iopassive;
2263 		} else {
2264 			value = requested.thrp_int_iopassive;
2265 		}
2266 		break;
2267 	case TASK_POLICY_QOS:
2268 		assert(category == TASK_POLICY_ATTRIBUTE);
2269 		value = requested.thrp_qos;
2270 		break;
2271 	case TASK_POLICY_QOS_OVERRIDE:
2272 		assert(category == TASK_POLICY_ATTRIBUTE);
2273 		value = requested.thrp_qos_override;
2274 		break;
2275 	case TASK_POLICY_LATENCY_QOS:
2276 		assert(category == TASK_POLICY_ATTRIBUTE);
2277 		value = requested.thrp_latency_qos;
2278 		break;
2279 	case TASK_POLICY_THROUGH_QOS:
2280 		assert(category == TASK_POLICY_ATTRIBUTE);
2281 		value = requested.thrp_through_qos;
2282 		break;
2283 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2284 		assert(category == TASK_POLICY_ATTRIBUTE);
2285 		value = requested.thrp_qos_workq_override;
2286 		break;
2287 	case TASK_POLICY_QOS_AND_RELPRIO:
2288 		assert(category == TASK_POLICY_ATTRIBUTE);
2289 		assert(value2 != NULL);
2290 		value = requested.thrp_qos;
2291 		*value2 = requested.thrp_qos_relprio;
2292 		break;
2293 	case TASK_POLICY_QOS_PROMOTE:
2294 		assert(category == TASK_POLICY_ATTRIBUTE);
2295 		value = requested.thrp_qos_promote;
2296 		break;
2297 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2298 		assert(category == TASK_POLICY_ATTRIBUTE);
2299 		value = requested.thrp_qos_kevent_override;
2300 		break;
2301 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2302 		assert(category == TASK_POLICY_ATTRIBUTE);
2303 		value = requested.thrp_qos_wlsvc_override;
2304 		break;
2305 	case TASK_POLICY_TERMINATED:
2306 		assert(category == TASK_POLICY_ATTRIBUTE);
2307 		value = requested.thrp_terminated;
2308 		break;
2309 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2310 		assert(category == TASK_POLICY_ATTRIBUTE);
2311 		value = requested.thrp_iotier_kevent_override;
2312 		break;
2313 
2314 	case TASK_POLICY_WI_DRIVEN:
2315 		assert(category == TASK_POLICY_ATTRIBUTE);
2316 		value = requested.thrp_wi_driven;
2317 		break;
2318 
2319 	default:
2320 		panic("unknown policy_flavor %d", flavor);
2321 		break;
2322 	}
2323 
2324 	return value;
2325 }
2326 
2327 /*
2328  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2329  *
2330  * NOTE: This accessor does not take the task or thread lock.
2331  * Notifications of state updates need to be externally synchronized with state queries.
2332  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2333  * within the context of a timer interrupt.
2334  *
2335  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2336  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2337  *      I don't think that cost is worth not having the right answer.
2338  */
2339 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2340 proc_get_effective_thread_policy(thread_t thread,
2341     int      flavor)
2342 {
2343 	int value = 0;
2344 
2345 	switch (flavor) {
2346 	case TASK_POLICY_DARWIN_BG:
2347 		/*
2348 		 * This call is used within the timer layer, as well as
2349 		 * prioritizing requests to the graphics system.
2350 		 * It also informs SFI and originator-bg-state.
2351 		 * Returns 1 for background mode, 0 for normal mode
2352 		 */
2353 
2354 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2355 		break;
2356 	case TASK_POLICY_IO:
2357 		/*
2358 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2359 		 * Returns THROTTLE_LEVEL_* values
2360 		 */
2361 		value = thread->effective_policy.thep_io_tier;
2362 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2363 			value = MIN(value, thread->iotier_override);
2364 		}
2365 		break;
2366 	case TASK_POLICY_PASSIVE_IO:
2367 		/*
2368 		 * The I/O system calls here to find out whether an operation should be passive.
2369 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2370 		 * Returns 1 for passive mode, 0 for normal mode
2371 		 *
2372 		 * If an override is causing IO to go into a lower tier, we also set
2373 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2374 		 * window when the override goes away.
2375 		 */
2376 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2377 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2378 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2379 			value = 1;
2380 		}
2381 		break;
2382 	case TASK_POLICY_ALL_SOCKETS_BG:
2383 		/*
2384 		 * do_background_socket() calls this to determine whether
2385 		 * it should change the thread's sockets
2386 		 * Returns 1 for background mode, 0 for normal mode
2387 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2388 		 * doesn't get you out of the network throttle.
2389 		 */
2390 		value = (thread->effective_policy.thep_all_sockets_bg ||
2391 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2392 		break;
2393 	case TASK_POLICY_NEW_SOCKETS_BG:
2394 		/*
2395 		 * socreate() calls this to determine if it should mark a new socket as background
2396 		 * Returns 1 for background mode, 0 for normal mode
2397 		 */
2398 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2399 		break;
2400 	case TASK_POLICY_LATENCY_QOS:
2401 		/*
2402 		 * timer arming calls into here to find out the timer coalescing level
2403 		 * Returns a latency QoS tier (0-6)
2404 		 */
2405 		value = thread->effective_policy.thep_latency_qos;
2406 		break;
2407 	case TASK_POLICY_THROUGH_QOS:
2408 		/*
2409 		 * This value is passed into the urgency callout from the scheduler
2410 		 * to the performance management subsystem.
2411 		 *
2412 		 * Returns a throughput QoS tier (0-6)
2413 		 */
2414 		value = thread->effective_policy.thep_through_qos;
2415 		break;
2416 	case TASK_POLICY_QOS:
2417 		/*
2418 		 * This is communicated to the performance management layer and SFI.
2419 		 *
2420 		 * Returns a QoS policy tier
2421 		 */
2422 		value = thread->effective_policy.thep_qos;
2423 		break;
2424 	default:
2425 		panic("unknown thread policy flavor %d", flavor);
2426 		break;
2427 	}
2428 
2429 	return value;
2430 }
2431 
2432 
2433 /*
2434  * (integer_t) casts limit the number of bits we can fit here
2435  * this interface is deprecated and replaced by the _EXT struct ?
2436  */
2437 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2438 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2439 {
2440 	uint64_t bits = 0;
2441 	struct thread_requested_policy requested = thread->requested_policy;
2442 
2443 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2444 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2445 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2446 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2447 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2448 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2449 
2450 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2451 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2452 
2453 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2454 
2455 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2456 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2457 
2458 	info->requested = (integer_t) bits;
2459 	bits = 0;
2460 
2461 	struct thread_effective_policy effective = thread->effective_policy;
2462 
2463 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2464 
2465 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2466 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2467 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2468 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2469 
2470 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2471 
2472 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2473 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2474 
2475 	info->effective = (integer_t)bits;
2476 	bits = 0;
2477 
2478 	info->pending = 0;
2479 }
2480 
2481 /*
2482  * Sneakily trace either the task and thread requested
2483  * or just the thread requested, depending on if we have enough room.
2484  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2485  *
2486  *                                LP32            LP64
2487  * threquested_0(thread)          thread[0]       task[0]
2488  * threquested_1(thread)          thread[1]       thread[0]
2489  *
2490  */
2491 
2492 uintptr_t
threquested_0(thread_t thread)2493 threquested_0(thread_t thread)
2494 {
2495 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2496 
2497 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2498 
2499 	return raw[0];
2500 }
2501 
2502 uintptr_t
threquested_1(thread_t thread)2503 threquested_1(thread_t thread)
2504 {
2505 #if defined __LP64__
2506 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2507 #else
2508 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2509 	return raw[1];
2510 #endif
2511 }
2512 
2513 uintptr_t
theffective_0(thread_t thread)2514 theffective_0(thread_t thread)
2515 {
2516 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2517 
2518 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2519 	return raw[0];
2520 }
2521 
2522 uintptr_t
theffective_1(thread_t thread)2523 theffective_1(thread_t thread)
2524 {
2525 #if defined __LP64__
2526 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2527 #else
2528 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2529 	return raw[1];
2530 #endif
2531 }
2532 
2533 
2534 /*
2535  * Set an override on the thread which is consulted with a
2536  * higher priority than the task/thread policy. This should
2537  * only be set for temporary grants until the thread
2538  * returns to the userspace boundary
2539  *
2540  * We use atomic operations to swap in the override, with
2541  * the assumption that the thread itself can
2542  * read the override and clear it on return to userspace.
2543  *
2544  * No locking is performed, since it is acceptable to see
2545  * a stale override for one loop through throttle_lowpri_io().
2546  * However a thread reference must be held on the thread.
2547  */
2548 
2549 void
set_thread_iotier_override(thread_t thread,int policy)2550 set_thread_iotier_override(thread_t thread, int policy)
2551 {
2552 	int current_override;
2553 
2554 	/* Let most aggressive I/O policy win until user boundary */
2555 	do {
2556 		current_override = thread->iotier_override;
2557 
2558 		if (current_override != THROTTLE_LEVEL_NONE) {
2559 			policy = MIN(current_override, policy);
2560 		}
2561 
2562 		if (current_override == policy) {
2563 			/* no effective change */
2564 			return;
2565 		}
2566 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2567 
2568 	/*
2569 	 * Since the thread may be currently throttled,
2570 	 * re-evaluate tiers and potentially break out
2571 	 * of an msleep
2572 	 */
2573 	rethrottle_thread(get_bsdthread_info(thread));
2574 }
2575 
2576 /*
2577  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2578  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2579  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2580  * priority thread. In these cases, we attempt to propagate the priority token, as long
2581  * as the subsystem informs us of the relationships between the threads. The userspace
2582  * synchronization subsystem should maintain the information of owner->resource and
2583  * resource->waiters itself.
2584  */
2585 
2586 /*
2587  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2588  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2589  * to be handled specially in the future, but for now it's fine to slam
2590  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2591  */
2592 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2593 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2594 {
2595 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2596 		/* Map all input resource/type to a single one */
2597 		*resource = USER_ADDR_NULL;
2598 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2599 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2600 		/* no transform */
2601 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2602 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2603 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2604 			*resource = USER_ADDR_NULL;
2605 		}
2606 	}
2607 }
2608 
2609 /* This helper routine finds an existing override if known. Locking should be done by caller */
2610 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2611 find_qos_override(thread_t thread,
2612     user_addr_t resource,
2613     int resource_type)
2614 {
2615 	struct thread_qos_override *override;
2616 
2617 	override = thread->overrides;
2618 	while (override) {
2619 		if (override->override_resource == resource &&
2620 		    override->override_resource_type == resource_type) {
2621 			return override;
2622 		}
2623 
2624 		override = override->override_next;
2625 	}
2626 
2627 	return NULL;
2628 }
2629 
2630 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2631 find_and_decrement_qos_override(thread_t       thread,
2632     user_addr_t    resource,
2633     int            resource_type,
2634     boolean_t      reset,
2635     struct thread_qos_override **free_override_list)
2636 {
2637 	struct thread_qos_override *override, *override_prev;
2638 
2639 	override_prev = NULL;
2640 	override = thread->overrides;
2641 	while (override) {
2642 		struct thread_qos_override *override_next = override->override_next;
2643 
2644 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2645 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2646 			if (reset) {
2647 				override->override_contended_resource_count = 0;
2648 			} else {
2649 				override->override_contended_resource_count--;
2650 			}
2651 
2652 			if (override->override_contended_resource_count == 0) {
2653 				if (override_prev == NULL) {
2654 					thread->overrides = override_next;
2655 				} else {
2656 					override_prev->override_next = override_next;
2657 				}
2658 
2659 				/* Add to out-param for later zfree */
2660 				override->override_next = *free_override_list;
2661 				*free_override_list = override;
2662 			} else {
2663 				override_prev = override;
2664 			}
2665 
2666 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2667 				return;
2668 			}
2669 		} else {
2670 			override_prev = override;
2671 		}
2672 
2673 		override = override_next;
2674 	}
2675 }
2676 
2677 /* This helper recalculates the current requested override using the policy selected at boot */
2678 static int
calculate_requested_qos_override(thread_t thread)2679 calculate_requested_qos_override(thread_t thread)
2680 {
2681 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2682 		return THREAD_QOS_UNSPECIFIED;
2683 	}
2684 
2685 	/* iterate over all overrides and calculate MAX */
2686 	struct thread_qos_override *override;
2687 	int qos_override = THREAD_QOS_UNSPECIFIED;
2688 
2689 	override = thread->overrides;
2690 	while (override) {
2691 		qos_override = MAX(qos_override, override->override_qos);
2692 		override = override->override_next;
2693 	}
2694 
2695 	return qos_override;
2696 }
2697 
2698 /*
2699  * Returns:
2700  * - 0 on success
2701  * - EINVAL if some invalid input was passed
2702  */
2703 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2704 proc_thread_qos_add_override_internal(thread_t         thread,
2705     int              override_qos,
2706     boolean_t        first_override_for_resource,
2707     user_addr_t      resource,
2708     int              resource_type)
2709 {
2710 	struct task_pend_token pend_token = {};
2711 	int rc = 0;
2712 
2713 	thread_mtx_lock(thread);
2714 
2715 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2716 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2717 
2718 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2719 	    uint64_t, thread->requested_policy.thrp_qos,
2720 	    uint64_t, thread->effective_policy.thep_qos,
2721 	    int, override_qos, boolean_t, first_override_for_resource);
2722 
2723 	struct thread_qos_override *override;
2724 	struct thread_qos_override *override_new = NULL;
2725 	int new_qos_override, prev_qos_override;
2726 	int new_effective_qos;
2727 
2728 	canonicalize_resource_and_type(&resource, &resource_type);
2729 
2730 	override = find_qos_override(thread, resource, resource_type);
2731 	if (first_override_for_resource && !override) {
2732 		/* We need to allocate a new object. Drop the thread lock and
2733 		 * recheck afterwards in case someone else added the override
2734 		 */
2735 		thread_mtx_unlock(thread);
2736 		override_new = zalloc(thread_qos_override_zone);
2737 		thread_mtx_lock(thread);
2738 		override = find_qos_override(thread, resource, resource_type);
2739 	}
2740 	if (first_override_for_resource && override) {
2741 		/* Someone else already allocated while the thread lock was dropped */
2742 		override->override_contended_resource_count++;
2743 	} else if (!override && override_new) {
2744 		override = override_new;
2745 		override_new = NULL;
2746 		override->override_next = thread->overrides;
2747 		/* since first_override_for_resource was TRUE */
2748 		override->override_contended_resource_count = 1;
2749 		override->override_resource = resource;
2750 		override->override_resource_type = (int16_t)resource_type;
2751 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2752 		thread->overrides = override;
2753 	}
2754 
2755 	if (override) {
2756 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2757 			override->override_qos = (int16_t)override_qos;
2758 		} else {
2759 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2760 		}
2761 	}
2762 
2763 	/* Determine how to combine the various overrides into a single current
2764 	 * requested override
2765 	 */
2766 	new_qos_override = calculate_requested_qos_override(thread);
2767 
2768 	prev_qos_override = proc_get_thread_policy_locked(thread,
2769 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2770 
2771 	if (new_qos_override != prev_qos_override) {
2772 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2773 		    TASK_POLICY_QOS_OVERRIDE,
2774 		    new_qos_override, 0, &pend_token);
2775 	}
2776 
2777 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2778 
2779 	thread_mtx_unlock(thread);
2780 
2781 	thread_policy_update_complete_unlocked(thread, &pend_token);
2782 
2783 	if (override_new) {
2784 		zfree(thread_qos_override_zone, override_new);
2785 	}
2786 
2787 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2788 	    int, new_qos_override, int, new_effective_qos, int, rc);
2789 
2790 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2791 	    new_qos_override, resource, resource_type, 0, 0);
2792 
2793 	return rc;
2794 }
2795 
2796 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2797 proc_thread_qos_add_override(task_t           task,
2798     thread_t         thread,
2799     uint64_t         tid,
2800     int              override_qos,
2801     boolean_t        first_override_for_resource,
2802     user_addr_t      resource,
2803     int              resource_type)
2804 {
2805 	boolean_t has_thread_reference = FALSE;
2806 	int rc = 0;
2807 
2808 	if (thread == THREAD_NULL) {
2809 		thread = task_findtid(task, tid);
2810 		/* returns referenced thread */
2811 
2812 		if (thread == THREAD_NULL) {
2813 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2814 			    tid, 0, 0xdead, 0, 0);
2815 			return ESRCH;
2816 		}
2817 		has_thread_reference = TRUE;
2818 	} else {
2819 		assert(get_threadtask(thread) == task);
2820 	}
2821 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2822 	    first_override_for_resource, resource, resource_type);
2823 	if (has_thread_reference) {
2824 		thread_deallocate(thread);
2825 	}
2826 
2827 	return rc;
2828 }
2829 
2830 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2831 proc_thread_qos_remove_override_internal(thread_t       thread,
2832     user_addr_t    resource,
2833     int            resource_type,
2834     boolean_t      reset)
2835 {
2836 	struct task_pend_token pend_token = {};
2837 
2838 	struct thread_qos_override *deferred_free_override_list = NULL;
2839 	int new_qos_override, prev_qos_override, new_effective_qos;
2840 
2841 	thread_mtx_lock(thread);
2842 
2843 	canonicalize_resource_and_type(&resource, &resource_type);
2844 
2845 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2846 
2847 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2848 	    thread_tid(thread), resource, reset, 0, 0);
2849 
2850 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2851 	    uint64_t, thread->requested_policy.thrp_qos,
2852 	    uint64_t, thread->effective_policy.thep_qos);
2853 
2854 	/* Determine how to combine the various overrides into a single current requested override */
2855 	new_qos_override = calculate_requested_qos_override(thread);
2856 
2857 	spl_t s = splsched();
2858 	thread_lock(thread);
2859 
2860 	/*
2861 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2862 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2863 	 * This means you can't change the current override from a spinlock-only setter.
2864 	 */
2865 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2866 
2867 	if (new_qos_override != prev_qos_override) {
2868 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2869 	}
2870 
2871 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2872 
2873 	thread_unlock(thread);
2874 	splx(s);
2875 
2876 	thread_mtx_unlock(thread);
2877 
2878 	thread_policy_update_complete_unlocked(thread, &pend_token);
2879 
2880 	while (deferred_free_override_list) {
2881 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2882 
2883 		zfree(thread_qos_override_zone, deferred_free_override_list);
2884 		deferred_free_override_list = override_next;
2885 	}
2886 
2887 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2888 	    int, new_qos_override, int, new_effective_qos);
2889 
2890 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2891 	    thread_tid(thread), 0, 0, 0, 0);
2892 }
2893 
2894 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2895 proc_thread_qos_remove_override(task_t      task,
2896     thread_t    thread,
2897     uint64_t    tid,
2898     user_addr_t resource,
2899     int         resource_type)
2900 {
2901 	boolean_t has_thread_reference = FALSE;
2902 
2903 	if (thread == THREAD_NULL) {
2904 		thread = task_findtid(task, tid);
2905 		/* returns referenced thread */
2906 
2907 		if (thread == THREAD_NULL) {
2908 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2909 			    tid, 0, 0xdead, 0, 0);
2910 			return ESRCH;
2911 		}
2912 		has_thread_reference = TRUE;
2913 	} else {
2914 		assert(task == get_threadtask(thread));
2915 	}
2916 
2917 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2918 
2919 	if (has_thread_reference) {
2920 		thread_deallocate(thread);
2921 	}
2922 
2923 	return 0;
2924 }
2925 
2926 /* Deallocate before thread termination */
2927 void
proc_thread_qos_deallocate(thread_t thread)2928 proc_thread_qos_deallocate(thread_t thread)
2929 {
2930 	/* This thread must have no more IPC overrides. */
2931 	assert(thread->kevent_overrides == 0);
2932 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2933 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2934 
2935 	/*
2936 	 * Clear out any lingering override objects.
2937 	 */
2938 	struct thread_qos_override *override;
2939 
2940 	thread_mtx_lock(thread);
2941 	override = thread->overrides;
2942 	thread->overrides = NULL;
2943 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2944 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2945 	thread_mtx_unlock(thread);
2946 
2947 	while (override) {
2948 		struct thread_qos_override *override_next = override->override_next;
2949 
2950 		zfree(thread_qos_override_zone, override);
2951 		override = override_next;
2952 	}
2953 }
2954 
2955 /*
2956  * Set up the primordial thread's QoS
2957  */
2958 void
task_set_main_thread_qos(task_t task,thread_t thread)2959 task_set_main_thread_qos(task_t task, thread_t thread)
2960 {
2961 	struct task_pend_token pend_token = {};
2962 
2963 	assert(get_threadtask(thread) == task);
2964 
2965 	thread_mtx_lock(thread);
2966 
2967 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2968 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2969 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2970 	    thread->requested_policy.thrp_qos, 0);
2971 
2972 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2973 
2974 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2975 	    primordial_qos, 0, &pend_token);
2976 
2977 	thread_mtx_unlock(thread);
2978 
2979 	thread_policy_update_complete_unlocked(thread, &pend_token);
2980 
2981 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2982 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2983 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2984 	    primordial_qos, 0);
2985 }
2986 
2987 /*
2988  * KPI for pthread kext
2989  *
2990  * Return a good guess at what the initial manager QoS will be
2991  * Dispatch can override this in userspace if it so chooses
2992  */
2993 thread_qos_t
task_get_default_manager_qos(task_t task)2994 task_get_default_manager_qos(task_t task)
2995 {
2996 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2997 
2998 	if (primordial_qos == THREAD_QOS_LEGACY) {
2999 		primordial_qos = THREAD_QOS_USER_INITIATED;
3000 	}
3001 
3002 	return primordial_qos;
3003 }
3004 
3005 /*
3006  * Check if the kernel promotion on thread has changed
3007  * and apply it.
3008  *
3009  * thread locked on entry and exit
3010  */
3011 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)3012 thread_recompute_kernel_promotion_locked(thread_t thread)
3013 {
3014 	boolean_t needs_update = FALSE;
3015 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3016 
3017 	/*
3018 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3019 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3020 	 * and propagates the priority through the chain with the same cap, because as of now it does
3021 	 * not differenciate on the kernel primitive.
3022 	 *
3023 	 * If this assumption will change with the adoption of a kernel primitive that does not
3024 	 * cap the when adding/propagating,
3025 	 * then here is the place to put the generic cap for all kernel primitives
3026 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3027 	 */
3028 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3029 
3030 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3031 		KDBG(MACHDBG_CODE(
3032 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3033 		    thread_tid(thread),
3034 		    kern_promotion_schedpri,
3035 		    thread->kern_promotion_schedpri);
3036 
3037 		needs_update = TRUE;
3038 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3039 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3040 	}
3041 
3042 	return needs_update;
3043 }
3044 
3045 /*
3046  * Check if the user promotion on thread has changed
3047  * and apply it.
3048  *
3049  * thread locked on entry, might drop the thread lock
3050  * and reacquire it.
3051  */
3052 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3053 thread_recompute_user_promotion_locked(thread_t thread)
3054 {
3055 	boolean_t needs_update = FALSE;
3056 	struct task_pend_token pend_token = {};
3057 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3058 	int old_base_pri = thread->base_pri;
3059 	thread_qos_t qos_promotion;
3060 
3061 	/* Check if user promotion has changed */
3062 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3063 		return needs_update;
3064 	} else {
3065 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3066 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3067 		    thread_tid(thread),
3068 		    user_promotion_basepri,
3069 		    thread->user_promotion_basepri,
3070 		    0, 0);
3071 		KDBG(MACHDBG_CODE(
3072 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3073 		    thread_tid(thread),
3074 		    user_promotion_basepri,
3075 		    thread->user_promotion_basepri);
3076 	}
3077 
3078 	/* Update the user promotion base pri */
3079 	thread->user_promotion_basepri = user_promotion_basepri;
3080 	pend_token.tpt_force_recompute_pri = 1;
3081 
3082 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3083 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3084 	} else {
3085 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3086 	}
3087 
3088 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3089 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3090 
3091 	if (thread_get_waiting_turnstile(thread) &&
3092 	    thread->base_pri != old_base_pri) {
3093 		needs_update = TRUE;
3094 	}
3095 
3096 	thread_unlock(thread);
3097 
3098 	thread_policy_update_complete_unlocked(thread, &pend_token);
3099 
3100 	thread_lock(thread);
3101 
3102 	return needs_update;
3103 }
3104 
3105 /*
3106  * Convert the thread user promotion base pri to qos for threads in qos world.
3107  * For priority above UI qos, the qos would be set to UI.
3108  */
3109 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3110 thread_user_promotion_qos_for_pri(int priority)
3111 {
3112 	thread_qos_t qos;
3113 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3114 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3115 			return qos;
3116 		}
3117 	}
3118 	return THREAD_QOS_MAINTENANCE;
3119 }
3120 
3121 /*
3122  * Set the thread's QoS Kevent override
3123  * Owned by the Kevent subsystem
3124  *
3125  * May be called with spinlocks held, but not spinlocks
3126  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3127  *
3128  * One 'add' must be balanced by one 'drop'.
3129  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3130  * Before the thread is deallocated, there must be 0 remaining overrides.
3131  */
3132 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3133 thread_kevent_override(thread_t    thread,
3134     uint32_t    qos_override,
3135     boolean_t   is_new_override)
3136 {
3137 	struct task_pend_token pend_token = {};
3138 	boolean_t needs_update;
3139 
3140 	spl_t s = splsched();
3141 	thread_lock(thread);
3142 
3143 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3144 
3145 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3146 	assert(qos_override < THREAD_QOS_LAST);
3147 
3148 	if (is_new_override) {
3149 		if (thread->kevent_overrides++ == 0) {
3150 			/* This add is the first override for this thread */
3151 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3152 		} else {
3153 			/* There are already other overrides in effect for this thread */
3154 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3155 		}
3156 	} else {
3157 		/* There must be at least one override (the previous add call) in effect */
3158 		assert(thread->kevent_overrides > 0);
3159 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3160 	}
3161 
3162 	/*
3163 	 * We can't allow lowering if there are several IPC overrides because
3164 	 * the caller can't possibly know the whole truth
3165 	 */
3166 	if (thread->kevent_overrides == 1) {
3167 		needs_update = qos_override != old_override;
3168 	} else {
3169 		needs_update = qos_override > old_override;
3170 	}
3171 
3172 	if (needs_update) {
3173 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3174 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3175 		    qos_override, 0, &pend_token);
3176 		assert(pend_token.tpt_update_sockets == 0);
3177 	}
3178 
3179 	thread_unlock(thread);
3180 	splx(s);
3181 
3182 	thread_policy_update_complete_unlocked(thread, &pend_token);
3183 }
3184 
3185 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3186 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3187 {
3188 	thread_kevent_override(thread, qos_override, TRUE);
3189 }
3190 
3191 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3192 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3193 {
3194 	thread_kevent_override(thread, qos_override, FALSE);
3195 }
3196 
3197 void
thread_drop_kevent_override(thread_t thread)3198 thread_drop_kevent_override(thread_t thread)
3199 {
3200 	struct task_pend_token pend_token = {};
3201 
3202 	spl_t s = splsched();
3203 	thread_lock(thread);
3204 
3205 	assert(thread->kevent_overrides > 0);
3206 
3207 	if (--thread->kevent_overrides == 0) {
3208 		/*
3209 		 * There are no more overrides for this thread, so we should
3210 		 * clear out the saturated override value
3211 		 */
3212 
3213 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3214 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3215 		    0, &pend_token);
3216 	}
3217 
3218 	thread_unlock(thread);
3219 	splx(s);
3220 
3221 	thread_policy_update_complete_unlocked(thread, &pend_token);
3222 }
3223 
3224 /*
3225  * Set the thread's QoS Workloop Servicer override
3226  * Owned by the Kevent subsystem
3227  *
3228  * May be called with spinlocks held, but not spinlocks
3229  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3230  *
3231  * One 'add' must be balanced by one 'drop'.
3232  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3233  * Before the thread is deallocated, there must be 0 remaining overrides.
3234  */
3235 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3236 thread_servicer_override(thread_t    thread,
3237     uint32_t    qos_override,
3238     boolean_t   is_new_override)
3239 {
3240 	struct task_pend_token pend_token = {};
3241 
3242 	spl_t s = splsched();
3243 	thread_lock(thread);
3244 
3245 	if (is_new_override) {
3246 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3247 	} else {
3248 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3249 	}
3250 
3251 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3252 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3253 	    qos_override, 0, &pend_token);
3254 
3255 	thread_unlock(thread);
3256 	splx(s);
3257 
3258 	assert(pend_token.tpt_update_sockets == 0);
3259 	thread_policy_update_complete_unlocked(thread, &pend_token);
3260 }
3261 
3262 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3263 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3264 {
3265 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3266 	assert(qos_override < THREAD_QOS_LAST);
3267 
3268 	thread_servicer_override(thread, qos_override, TRUE);
3269 }
3270 
3271 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3272 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3273 {
3274 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3275 	assert(qos_override < THREAD_QOS_LAST);
3276 
3277 	thread_servicer_override(thread, qos_override, FALSE);
3278 }
3279 
3280 void
thread_drop_servicer_override(thread_t thread)3281 thread_drop_servicer_override(thread_t thread)
3282 {
3283 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3284 }
3285 
3286 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3287 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3288 {
3289 	struct task_pend_token pend_token = {};
3290 	uint8_t current_iotier;
3291 
3292 	/* Check if the update is needed */
3293 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3294 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3295 
3296 	if (iotier_override == current_iotier) {
3297 		return;
3298 	}
3299 
3300 	spl_t s = splsched();
3301 	thread_lock(thread);
3302 
3303 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3304 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3305 	    iotier_override, 0, &pend_token);
3306 
3307 	thread_unlock(thread);
3308 	splx(s);
3309 
3310 	assert(pend_token.tpt_update_sockets == 0);
3311 	thread_policy_update_complete_unlocked(thread, &pend_token);
3312 }
3313 
3314 /* Get current requested qos / relpri, may be called from spinlock context */
3315 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3316 thread_get_requested_qos(thread_t thread, int *relpri)
3317 {
3318 	int relprio_value = 0;
3319 	thread_qos_t qos;
3320 
3321 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3322 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3323 	if (relpri) {
3324 		*relpri = -relprio_value;
3325 	}
3326 	return qos;
3327 }
3328 
3329 /*
3330  * This function will promote the thread priority
3331  * since exec could block other threads calling
3332  * proc_find on the proc. This boost must be removed
3333  * via call to thread_clear_exec_promotion.
3334  *
3335  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3336  */
3337 void
thread_set_exec_promotion(thread_t thread)3338 thread_set_exec_promotion(thread_t thread)
3339 {
3340 	spl_t s = splsched();
3341 	thread_lock(thread);
3342 
3343 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3344 
3345 	thread_unlock(thread);
3346 	splx(s);
3347 }
3348 
3349 /*
3350  * This function will clear the exec thread
3351  * promotion set on the thread by thread_set_exec_promotion.
3352  */
3353 void
thread_clear_exec_promotion(thread_t thread)3354 thread_clear_exec_promotion(thread_t thread)
3355 {
3356 	spl_t s = splsched();
3357 	thread_lock(thread);
3358 
3359 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3360 
3361 	thread_unlock(thread);
3362 	splx(s);
3363 }
3364 
3365 #if CONFIG_SCHED_RT_ALLOW
3366 
3367 /*
3368  * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3369  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3370  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3371  */
3372 static TUNABLE(
3373 	bool,
3374 	rt_allow_policy_enabled,
3375 	"-rt-allow_policy-enable",
3376 	false
3377 	);
3378 
3379 /*
3380  * When the RT allow policy is enabled and a thread allowed to become RT,
3381  * sometimes (if the processes RT allow policy is restricted) the thread will
3382  * have a CPU limit enforced. The following two tunables determine the
3383  * parameters for that CPU limit.
3384  */
3385 
3386 /* % of the interval allowed to run. */
3387 TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3388     "rt_allow_limit_percent", 70);
3389 
3390 /* The length of interval in nanoseconds. */
3391 TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3392     "rt_allow_limit_interval", 10);
3393 
3394 static bool
thread_has_rt(thread_t thread)3395 thread_has_rt(thread_t thread)
3396 {
3397 	return
3398 	        thread->sched_mode == TH_MODE_REALTIME ||
3399 	        thread->saved_mode == TH_MODE_REALTIME;
3400 }
3401 
3402 /*
3403  * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3404  * up by the target thread via the ledger AST.
3405  */
3406 static void
thread_rt_set_cpulimit(thread_t thread)3407 thread_rt_set_cpulimit(thread_t thread)
3408 {
3409 	/* Force reasonable values for the cpu limit. */
3410 	const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3411 	const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3412 
3413 	thread->t_ledger_req_percentage = percent;
3414 	thread->t_ledger_req_interval_ms = interval_ms;
3415 	thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3416 
3417 	thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3418 }
3419 
3420 /* Similar to the above but removes any CPU limit. */
3421 static void
thread_rt_clear_cpulimit(thread_t thread)3422 thread_rt_clear_cpulimit(thread_t thread)
3423 {
3424 	thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3425 
3426 	thread->t_ledger_req_percentage = 0;
3427 	thread->t_ledger_req_interval_ms = 0;
3428 	thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3429 }
3430 
3431 /*
3432  * Evaluate RT policy for a thread, demoting and undemoting as needed.
3433  */
3434 void
thread_rt_evaluate(thread_t thread)3435 thread_rt_evaluate(thread_t thread)
3436 {
3437 	task_t task = get_threadtask(thread);
3438 	bool platform_binary = false;
3439 
3440 	/* If the RT allow policy is not enabled - nothing to do. */
3441 	if (!rt_allow_policy_enabled) {
3442 		return;
3443 	}
3444 
3445 	/* User threads only. */
3446 	if (task == kernel_task) {
3447 		return;
3448 	}
3449 
3450 	/* Check for platform binary. */
3451 	platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3452 
3453 	spl_t s = splsched();
3454 	thread_lock(thread);
3455 
3456 	const thread_work_interval_flags_t wi_flags =
3457 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3458 
3459 	/*
3460 	 * RT threads which are not joined to a work interval which allows RT
3461 	 * threads are demoted. Once those conditions no longer hold, the thread
3462 	 * undemoted.
3463 	 */
3464 	if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3465 		if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3466 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3467 			    thread_tid(thread));
3468 			sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3469 		}
3470 	} else {
3471 		if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3472 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3473 		}
3474 	}
3475 
3476 	/*
3477 	 * RT threads get a CPU limit unless they're part of a platform binary
3478 	 * task. If the thread is no longer RT, any existing CPU limit should be
3479 	 * removed.
3480 	 */
3481 	bool set_ast = false;
3482 	if (!platform_binary &&
3483 	    thread_has_rt(thread) &&
3484 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3485 		thread_rt_set_cpulimit(thread);
3486 		set_ast = true;
3487 	}
3488 
3489 	if (!platform_binary &&
3490 	    !thread_has_rt(thread) &&
3491 	    (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3492 		thread_rt_clear_cpulimit(thread);
3493 		set_ast = true;
3494 	}
3495 
3496 	thread_unlock(thread);
3497 	splx(s);
3498 
3499 	if (set_ast) {
3500 		/* Ensure the target thread picks up any CPU limit change. */
3501 		act_set_astledger(thread);
3502 	}
3503 }
3504 
3505 #else
3506 
3507 void
thread_rt_evaluate(__unused thread_t thread)3508 thread_rt_evaluate(__unused thread_t thread)
3509 {
3510 }
3511 
3512 #endif /*  CONFIG_SCHED_RT_ALLOW */
3513