xref: /xnu-8792.61.2/osfmk/kern/thread_policy.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <kern/work_interval.h>
37 #include <mach/task_policy.h>
38 #include <kern/sfi.h>
39 #include <kern/policy_internal.h>
40 #include <sys/errno.h>
41 #include <sys/ulock.h>
42 
43 #include <mach/machine/sdt.h>
44 
45 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46     struct thread_qos_override, KT_DEFAULT);
47 
48 #ifdef MACH_BSD
49 extern int      proc_selfpid(void);
50 extern char *   proc_name_address(void *p);
51 extern void     rethrottle_thread(void * uthread);
52 #endif /* MACH_BSD */
53 
54 #define QOS_EXTRACT(q)        ((q) & 0xff)
55 
56 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60 
61 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63 
64 static void
65 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66 
67 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
68 
69 const struct thread_requested_policy default_thread_requested_policy = {
70 	.thrp_iotier_kevent_override = thread_default_iotier_override
71 };
72 
73 /*
74  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75  * to threads that don't have a QoS class set.
76  */
77 const qos_policy_params_t thread_qos_policy_params = {
78 	/*
79 	 * This table defines the starting base priority of the thread,
80 	 * which will be modified by the thread importance and the task max priority
81 	 * before being applied.
82 	 */
83 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
84 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
85 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
86 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
87 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
88 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
89 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
90 
91 	/*
92 	 * This table defines the highest IO priority that a thread marked with this
93 	 * QoS class can have.
94 	 */
95 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
99 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
100 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
102 
103 	/*
104 	 * This table defines the highest QoS level that
105 	 * a thread marked with this QoS class can have.
106 	 */
107 
108 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115 
116 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123 };
124 
125 static void
126 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127 
128 static int
129 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130 
131 static void
132 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133 
134 static void
135 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136 
137 static void
138 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139 
140 static void
141 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142 
143 static int
144 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145 
146 static int
147 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148 
149 static void
150 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151 
152 static void
153 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154 
155 boolean_t
thread_has_qos_policy(thread_t thread)156 thread_has_qos_policy(thread_t thread)
157 {
158 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160 
161 
162 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)163 thread_remove_qos_policy_locked(thread_t thread,
164     task_pend_token_t pend_token)
165 {
166 	__unused int prev_qos = thread->requested_policy.thrp_qos;
167 
168 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169 
170 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173 
174 kern_return_t
thread_remove_qos_policy(thread_t thread)175 thread_remove_qos_policy(thread_t thread)
176 {
177 	struct task_pend_token pend_token = {};
178 
179 	thread_mtx_lock(thread);
180 	if (!thread->active) {
181 		thread_mtx_unlock(thread);
182 		return KERN_TERMINATED;
183 	}
184 
185 	thread_remove_qos_policy_locked(thread, &pend_token);
186 
187 	thread_mtx_unlock(thread);
188 
189 	thread_policy_update_complete_unlocked(thread, &pend_token);
190 
191 	return KERN_SUCCESS;
192 }
193 
194 
195 boolean_t
thread_is_static_param(thread_t thread)196 thread_is_static_param(thread_t thread)
197 {
198 	if (thread->static_param) {
199 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 		return TRUE;
201 	}
202 	return FALSE;
203 }
204 
205 /*
206  * Relative priorities can range between 0REL and -15REL. These
207  * map to QoS-specific ranges, to create non-overlapping priority
208  * ranges.
209  */
210 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 	int next_lower_qos;
214 
215 	/* Fast path, since no validation or scaling is needed */
216 	if (qos_relprio == 0) {
217 		return 0;
218 	}
219 
220 	switch (qos) {
221 	case THREAD_QOS_USER_INTERACTIVE:
222 		next_lower_qos = THREAD_QOS_USER_INITIATED;
223 		break;
224 	case THREAD_QOS_USER_INITIATED:
225 		next_lower_qos = THREAD_QOS_LEGACY;
226 		break;
227 	case THREAD_QOS_LEGACY:
228 		next_lower_qos = THREAD_QOS_UTILITY;
229 		break;
230 	case THREAD_QOS_UTILITY:
231 		next_lower_qos = THREAD_QOS_BACKGROUND;
232 		break;
233 	case THREAD_QOS_MAINTENANCE:
234 	case THREAD_QOS_BACKGROUND:
235 		next_lower_qos = 0;
236 		break;
237 	default:
238 		panic("Unrecognized QoS %d", qos);
239 		return 0;
240 	}
241 
242 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244 
245 	/*
246 	 * We now have the valid range that the scaled relative priority can map to. Note
247 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 	 * remainder.
251 	 */
252 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253 
254 	return scaled_relprio;
255 }
256 
257 /*
258  * flag set by -qos-policy-allow boot-arg to allow
259  * testing thread qos policy from userspace
260  */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262 
263 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)264 thread_policy_set(
265 	thread_t                                thread,
266 	thread_policy_flavor_t  flavor,
267 	thread_policy_t                 policy_info,
268 	mach_msg_type_number_t  count)
269 {
270 	thread_qos_policy_data_t req_qos;
271 	kern_return_t kr;
272 
273 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274 
275 	if (thread == THREAD_NULL) {
276 		return KERN_INVALID_ARGUMENT;
277 	}
278 
279 	if (!allow_qos_policy_set) {
280 		if (thread_is_static_param(thread)) {
281 			return KERN_POLICY_STATIC;
282 		}
283 
284 		if (flavor == THREAD_QOS_POLICY) {
285 			return KERN_INVALID_ARGUMENT;
286 		}
287 
288 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 				return KERN_INVALID_ARGUMENT;
291 			}
292 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 			if (info->priority != BASEPRI_RTQUEUES) {
294 				return KERN_INVALID_ARGUMENT;
295 			}
296 		}
297 	}
298 
299 	if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 			&thread->th_work_interval_flags, relaxed);
302 		if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 		    !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 			/* Fail requests to become realtime for threads having joined workintervals
305 			 * with workload ID that don't have the rt-allowed flag. */
306 			return KERN_INVALID_POLICY;
307 		}
308 	}
309 
310 	/* Threads without static_param set reset their QoS when other policies are applied. */
311 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 		/* Store the existing tier, if we fail this call it is used to reset back. */
313 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315 
316 		kr = thread_remove_qos_policy(thread);
317 		if (kr != KERN_SUCCESS) {
318 			return kr;
319 		}
320 	}
321 
322 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323 
324 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 		if (kr != KERN_SUCCESS) {
326 			/* Reset back to our original tier as the set failed. */
327 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 		}
329 	}
330 
331 	return kr;
332 }
333 
334 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338 
339 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)340 thread_policy_set_internal(
341 	thread_t                     thread,
342 	thread_policy_flavor_t       flavor,
343 	thread_policy_t              policy_info,
344 	mach_msg_type_number_t       count)
345 {
346 	kern_return_t result = KERN_SUCCESS;
347 	struct task_pend_token pend_token = {};
348 
349 	thread_mtx_lock(thread);
350 	if (!thread->active) {
351 		thread_mtx_unlock(thread);
352 
353 		return KERN_TERMINATED;
354 	}
355 
356 	switch (flavor) {
357 	case THREAD_EXTENDED_POLICY:
358 	{
359 		boolean_t timeshare = TRUE;
360 
361 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 			thread_extended_policy_t info;
363 
364 			info = (thread_extended_policy_t)policy_info;
365 			timeshare = info->timeshare;
366 		}
367 
368 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369 
370 		spl_t s = splsched();
371 		thread_lock(thread);
372 
373 		/*
374 		 * If the thread has previously requested realtime but is
375 		 * demoted with RT_RESTRICTED, undemote the thread before
376 		 * applying the new user sched mode. This prevents the thread
377 		 * being stuck at TIMESHARE or being made realtime unexpectedly
378 		 * (when undemoted).
379 		 */
380 		if ((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) != 0) {
381 			sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
382 		}
383 
384 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
385 
386 		thread_unlock(thread);
387 		splx(s);
388 
389 		pend_token.tpt_update_thread_sfi = 1;
390 
391 		break;
392 	}
393 
394 	case THREAD_TIME_CONSTRAINT_POLICY:
395 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
396 	{
397 		thread_time_constraint_with_priority_policy_t info;
398 
399 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
400 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
401 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
402 
403 		if (count < min_count) {
404 			result = KERN_INVALID_ARGUMENT;
405 			break;
406 		}
407 
408 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
409 
410 
411 		if (info->constraint < info->computation ||
412 		    info->computation > max_rt_quantum ||
413 		    info->computation < min_rt_quantum) {
414 			result = KERN_INVALID_ARGUMENT;
415 			break;
416 		}
417 
418 		if (info->computation < (info->constraint / 2)) {
419 			info->computation = (info->constraint / 2);
420 			if (info->computation > max_rt_quantum) {
421 				info->computation = max_rt_quantum;
422 			}
423 		}
424 
425 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
426 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
427 				result = KERN_INVALID_ARGUMENT;
428 				break;
429 			}
430 		}
431 
432 		spl_t s = splsched();
433 		thread_lock(thread);
434 
435 		thread->realtime.period          = info->period;
436 		thread->realtime.computation     = info->computation;
437 		thread->realtime.constraint      = info->constraint;
438 		thread->realtime.preemptible     = info->preemptible;
439 
440 		thread_work_interval_flags_t th_wi_flags = os_atomic_load(
441 			&thread->th_work_interval_flags, relaxed);
442 
443 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
444 			thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
445 		} else if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
446 		    (th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_CRITICAL)) {
447 			/* N.B. that criticality/realtime priority offset is currently not adjusted when the
448 			 * thread leaves the work interval, or only joins it after already having become realtime */
449 			thread->realtime.priority_offset = 1;
450 		} else {
451 			thread->realtime.priority_offset = 0;
452 		}
453 
454 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
455 
456 		thread_unlock(thread);
457 		splx(s);
458 
459 		thread_rt_evaluate(thread);
460 
461 		pend_token.tpt_update_thread_sfi = 1;
462 
463 		break;
464 	}
465 
466 	case THREAD_PRECEDENCE_POLICY:
467 	{
468 		thread_precedence_policy_t info;
469 
470 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
471 			result = KERN_INVALID_ARGUMENT;
472 			break;
473 		}
474 		info = (thread_precedence_policy_t)policy_info;
475 
476 		spl_t s = splsched();
477 		thread_lock(thread);
478 
479 		thread->importance = info->importance;
480 
481 		thread_recompute_priority(thread);
482 
483 		thread_unlock(thread);
484 		splx(s);
485 
486 		break;
487 	}
488 
489 	case THREAD_AFFINITY_POLICY:
490 	{
491 		extern boolean_t affinity_sets_enabled;
492 		thread_affinity_policy_t info;
493 
494 		if (!affinity_sets_enabled) {
495 			result = KERN_INVALID_POLICY;
496 			break;
497 		}
498 
499 		if (!thread_affinity_is_supported()) {
500 			result = KERN_NOT_SUPPORTED;
501 			break;
502 		}
503 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
504 			result = KERN_INVALID_ARGUMENT;
505 			break;
506 		}
507 
508 		info = (thread_affinity_policy_t) policy_info;
509 		/*
510 		 * Unlock the thread mutex here and
511 		 * return directly after calling thread_affinity_set().
512 		 * This is necessary for correct lock ordering because
513 		 * thread_affinity_set() takes the task lock.
514 		 */
515 		thread_mtx_unlock(thread);
516 		return thread_affinity_set(thread, info->affinity_tag);
517 	}
518 
519 #if !defined(XNU_TARGET_OS_OSX)
520 	case THREAD_BACKGROUND_POLICY:
521 	{
522 		thread_background_policy_t info;
523 
524 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
525 			result = KERN_INVALID_ARGUMENT;
526 			break;
527 		}
528 
529 		if (get_threadtask(thread) != current_task()) {
530 			result = KERN_PROTECTION_FAILURE;
531 			break;
532 		}
533 
534 		info = (thread_background_policy_t) policy_info;
535 
536 		int enable;
537 
538 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
539 			enable = TASK_POLICY_ENABLE;
540 		} else {
541 			enable = TASK_POLICY_DISABLE;
542 		}
543 
544 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
545 
546 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
547 
548 		break;
549 	}
550 #endif /* !defined(XNU_TARGET_OS_OSX) */
551 
552 	case THREAD_THROUGHPUT_QOS_POLICY:
553 	{
554 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
555 		thread_throughput_qos_t tqos;
556 
557 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
558 			result = KERN_INVALID_ARGUMENT;
559 			break;
560 		}
561 
562 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
563 			break;
564 		}
565 
566 		tqos = qos_extract(info->thread_throughput_qos_tier);
567 
568 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
569 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
570 
571 		break;
572 	}
573 
574 	case THREAD_LATENCY_QOS_POLICY:
575 	{
576 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
577 		thread_latency_qos_t lqos;
578 
579 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
580 			result = KERN_INVALID_ARGUMENT;
581 			break;
582 		}
583 
584 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
585 			break;
586 		}
587 
588 		lqos = qos_extract(info->thread_latency_qos_tier);
589 
590 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
591 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
592 
593 		break;
594 	}
595 
596 	case THREAD_QOS_POLICY:
597 	{
598 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
599 
600 		if (count < THREAD_QOS_POLICY_COUNT) {
601 			result = KERN_INVALID_ARGUMENT;
602 			break;
603 		}
604 
605 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
606 			result = KERN_INVALID_ARGUMENT;
607 			break;
608 		}
609 
610 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
611 			result = KERN_INVALID_ARGUMENT;
612 			break;
613 		}
614 
615 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
616 			result = KERN_INVALID_ARGUMENT;
617 			break;
618 		}
619 
620 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
621 		    info->qos_tier, -info->tier_importance, &pend_token);
622 
623 		break;
624 	}
625 
626 	default:
627 		result = KERN_INVALID_ARGUMENT;
628 		break;
629 	}
630 
631 	thread_mtx_unlock(thread);
632 
633 	thread_policy_update_complete_unlocked(thread, &pend_token);
634 
635 	return result;
636 }
637 
638 /*
639  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
640  * Both result in FIXED mode scheduling.
641  */
642 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)643 convert_policy_to_sched_mode(integer_t policy)
644 {
645 	switch (policy) {
646 	case POLICY_TIMESHARE:
647 		return TH_MODE_TIMESHARE;
648 	case POLICY_RR:
649 	case POLICY_FIFO:
650 		return TH_MODE_FIXED;
651 	default:
652 		panic("unexpected sched policy: %d", policy);
653 		return TH_MODE_NONE;
654 	}
655 }
656 
657 /*
658  * Called either with the thread mutex locked
659  * or from the pthread kext in a 'safe place'.
660  */
661 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)662 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
663     sched_mode_t          mode,
664     integer_t             priority,
665     task_pend_token_t     pend_token)
666 {
667 	kern_return_t kr = KERN_SUCCESS;
668 
669 	spl_t s = splsched();
670 	thread_lock(thread);
671 
672 	/* This path isn't allowed to change a thread out of realtime. */
673 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
674 	    (thread->saved_mode == TH_MODE_REALTIME)) {
675 		kr = KERN_FAILURE;
676 		goto unlock;
677 	}
678 
679 	if (thread->policy_reset) {
680 		kr = KERN_SUCCESS;
681 		goto unlock;
682 	}
683 
684 	sched_mode_t old_mode = thread->sched_mode;
685 
686 	/*
687 	 * Reverse engineer and apply the correct importance value
688 	 * from the requested absolute priority value.
689 	 *
690 	 * TODO: Store the absolute priority value instead
691 	 */
692 
693 	if (priority >= thread->max_priority) {
694 		priority = thread->max_priority - thread->task_priority;
695 	} else if (priority >= MINPRI_KERNEL) {
696 		priority -=  MINPRI_KERNEL;
697 	} else if (priority >= MINPRI_RESERVED) {
698 		priority -=  MINPRI_RESERVED;
699 	} else {
700 		priority -= BASEPRI_DEFAULT;
701 	}
702 
703 	priority += thread->task_priority;
704 
705 	if (priority > thread->max_priority) {
706 		priority = thread->max_priority;
707 	} else if (priority < MINPRI) {
708 		priority = MINPRI;
709 	}
710 
711 	thread->importance = priority - thread->task_priority;
712 
713 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
714 
715 	if (mode != old_mode) {
716 		pend_token->tpt_update_thread_sfi = 1;
717 	}
718 
719 unlock:
720 	thread_unlock(thread);
721 	splx(s);
722 
723 	return kr;
724 }
725 
726 void
thread_freeze_base_pri(thread_t thread)727 thread_freeze_base_pri(thread_t thread)
728 {
729 	assert(thread == current_thread());
730 
731 	spl_t s = splsched();
732 	thread_lock(thread);
733 
734 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
735 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
736 
737 	thread_unlock(thread);
738 	splx(s);
739 }
740 
741 bool
thread_unfreeze_base_pri(thread_t thread)742 thread_unfreeze_base_pri(thread_t thread)
743 {
744 	assert(thread == current_thread());
745 	integer_t base_pri;
746 	ast_t ast = 0;
747 
748 	spl_t s = splsched();
749 	thread_lock(thread);
750 
751 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
752 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
753 
754 	base_pri = thread->req_base_pri;
755 	if (base_pri != thread->base_pri) {
756 		/*
757 		 * This function returns "true" if the base pri change
758 		 * is the most likely cause for the preemption.
759 		 */
760 		sched_set_thread_base_priority(thread, base_pri);
761 		ast = ast_peek(AST_PREEMPT);
762 	}
763 
764 	thread_unlock(thread);
765 	splx(s);
766 
767 	return ast != 0;
768 }
769 
770 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)771 thread_workq_pri_for_qos(thread_qos_t qos)
772 {
773 	assert(qos < THREAD_QOS_LAST);
774 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
775 }
776 
777 thread_qos_t
thread_workq_qos_for_pri(int priority)778 thread_workq_qos_for_pri(int priority)
779 {
780 	thread_qos_t qos;
781 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
782 		// indicate that workq should map >UI threads to workq's
783 		// internal notation for above-UI work.
784 		return THREAD_QOS_UNSPECIFIED;
785 	}
786 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
787 		// map a given priority up to the next nearest qos band.
788 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
789 			return qos;
790 		}
791 	}
792 	return THREAD_QOS_MAINTENANCE;
793 }
794 
795 /*
796  * private interface for pthread workqueues
797  *
798  * Set scheduling policy & absolute priority for thread
799  * May be called with spinlocks held
800  * Thread mutex lock is not held
801  */
802 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)803 thread_reset_workq_qos(thread_t thread, uint32_t qos)
804 {
805 	struct task_pend_token pend_token = {};
806 
807 	assert(qos < THREAD_QOS_LAST);
808 
809 	spl_t s = splsched();
810 	thread_lock(thread);
811 
812 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
813 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
814 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
815 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
816 	    &pend_token);
817 
818 	assert(pend_token.tpt_update_sockets == 0);
819 
820 	thread_unlock(thread);
821 	splx(s);
822 
823 	thread_policy_update_complete_unlocked(thread, &pend_token);
824 }
825 
826 /*
827  * private interface for pthread workqueues
828  *
829  * Set scheduling policy & absolute priority for thread
830  * May be called with spinlocks held
831  * Thread mutex lock is held
832  */
833 void
thread_set_workq_override(thread_t thread,uint32_t qos)834 thread_set_workq_override(thread_t thread, uint32_t qos)
835 {
836 	struct task_pend_token pend_token = {};
837 
838 	assert(qos < THREAD_QOS_LAST);
839 
840 	spl_t s = splsched();
841 	thread_lock(thread);
842 
843 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
844 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
845 
846 	assert(pend_token.tpt_update_sockets == 0);
847 
848 	thread_unlock(thread);
849 	splx(s);
850 
851 	thread_policy_update_complete_unlocked(thread, &pend_token);
852 }
853 
854 /*
855  * private interface for pthread workqueues
856  *
857  * Set scheduling policy & absolute priority for thread
858  * May be called with spinlocks held
859  * Thread mutex lock is not held
860  */
861 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)862 thread_set_workq_pri(thread_t  thread,
863     thread_qos_t qos,
864     integer_t priority,
865     integer_t policy)
866 {
867 	struct task_pend_token pend_token = {};
868 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
869 
870 	assert(qos < THREAD_QOS_LAST);
871 	assert(thread->static_param);
872 
873 	if (!thread->static_param || !thread->active) {
874 		return;
875 	}
876 
877 	spl_t s = splsched();
878 	thread_lock(thread);
879 
880 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
881 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
882 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
883 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
884 	    0, &pend_token);
885 
886 	thread_unlock(thread);
887 	splx(s);
888 
889 	/* Concern: this doesn't hold the mutex... */
890 
891 	__assert_only kern_return_t kr;
892 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
893 	    &pend_token);
894 	assert(kr == KERN_SUCCESS);
895 
896 	if (pend_token.tpt_update_thread_sfi) {
897 		sfi_reevaluate(thread);
898 	}
899 }
900 
901 /*
902  * thread_set_mode_and_absolute_pri:
903  *
904  * Set scheduling policy & absolute priority for thread, for deprecated
905  * thread_set_policy and thread_policy interfaces.
906  *
907  * Called with nothing locked.
908  */
909 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)910 thread_set_mode_and_absolute_pri(thread_t   thread,
911     integer_t  policy,
912     integer_t  priority)
913 {
914 	kern_return_t kr = KERN_SUCCESS;
915 	struct task_pend_token pend_token = {};
916 
917 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
918 
919 	thread_mtx_lock(thread);
920 
921 	if (!thread->active) {
922 		kr = KERN_TERMINATED;
923 		goto unlock;
924 	}
925 
926 	if (thread_is_static_param(thread)) {
927 		kr = KERN_POLICY_STATIC;
928 		goto unlock;
929 	}
930 
931 	/* Setting legacy policies on threads kills the current QoS */
932 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
933 		thread_remove_qos_policy_locked(thread, &pend_token);
934 	}
935 
936 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
937 
938 unlock:
939 	thread_mtx_unlock(thread);
940 
941 	thread_policy_update_complete_unlocked(thread, &pend_token);
942 
943 	return kr;
944 }
945 
946 /*
947  * Set the thread's requested mode and recompute priority
948  * Called with thread mutex and thread locked
949  *
950  * TODO: Mitigate potential problems caused by moving thread to end of runq
951  * whenever its priority is recomputed
952  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
953  */
954 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)955 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
956 {
957 	if (thread->policy_reset) {
958 		return;
959 	}
960 
961 	boolean_t removed = thread_run_queue_remove(thread);
962 
963 	sched_set_thread_mode_user(thread, mode);
964 
965 	thread_recompute_priority(thread);
966 
967 	if (removed) {
968 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
969 	}
970 }
971 
972 /* called at splsched with thread lock locked */
973 static void
thread_update_qos_cpu_time_locked(thread_t thread)974 thread_update_qos_cpu_time_locked(thread_t thread)
975 {
976 	task_t task = get_threadtask(thread);
977 	uint64_t timer_sum, timer_delta;
978 
979 	/*
980 	 * This is only as accurate the thread's last context switch or user/kernel
981 	 * transition (unless precise user/kernel time is disabled).
982 	 *
983 	 * TODO: Consider running an update operation here to update it first.
984 	 *       Maybe doable with interrupts disabled from current thread.
985 	 *       If the thread is on a different core, may not be easy to get right.
986 	 */
987 
988 	timer_sum = recount_thread_time_mach(thread);
989 	timer_delta = timer_sum - thread->vtimer_qos_save;
990 
991 	thread->vtimer_qos_save = timer_sum;
992 
993 	uint64_t* task_counter = NULL;
994 
995 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
996 	switch (thread->effective_policy.thep_qos) {
997 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
998 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
999 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1000 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1001 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1002 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1003 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1004 	default:
1005 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1006 	}
1007 
1008 	OSAddAtomic64(timer_delta, task_counter);
1009 
1010 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
1011 	switch (thread->requested_policy.thrp_qos) {
1012 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1013 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1014 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1015 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1016 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1017 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1018 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1019 	default:
1020 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1021 	}
1022 
1023 	OSAddAtomic64(timer_delta, task_counter);
1024 }
1025 
1026 /*
1027  * called with no thread locks held
1028  * may hold task lock
1029  */
1030 void
thread_update_qos_cpu_time(thread_t thread)1031 thread_update_qos_cpu_time(thread_t thread)
1032 {
1033 	thread_mtx_lock(thread);
1034 
1035 	spl_t s = splsched();
1036 	thread_lock(thread);
1037 
1038 	thread_update_qos_cpu_time_locked(thread);
1039 
1040 	thread_unlock(thread);
1041 	splx(s);
1042 
1043 	thread_mtx_unlock(thread);
1044 }
1045 
1046 /*
1047  * Calculate base priority from thread attributes, and set it on the thread
1048  *
1049  * Called with thread_lock and thread mutex held.
1050  */
1051 extern boolean_t vps_dynamic_priority_enabled;
1052 
1053 void
thread_recompute_priority(thread_t thread)1054 thread_recompute_priority(
1055 	thread_t                thread)
1056 {
1057 	integer_t               priority;
1058 	integer_t               adj_priority;
1059 
1060 	if (thread->policy_reset) {
1061 		return;
1062 	}
1063 
1064 	if (thread->sched_mode == TH_MODE_REALTIME) {
1065 		uint8_t i = thread->realtime.priority_offset;
1066 		assert((i >= 0) && (i < NRTQS));
1067 		priority = BASEPRI_RTQUEUES + i;
1068 		sched_set_thread_base_priority(thread, priority);
1069 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1070 			/* Make sure the thread has a valid deadline */
1071 			uint64_t ctime = mach_absolute_time();
1072 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1073 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1074 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1075 		}
1076 		return;
1077 	} else if (thread->effective_policy.thep_wi_driven) {
1078 		priority = work_interval_get_priority(thread);
1079 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1080 		int qos = thread->effective_policy.thep_qos;
1081 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1082 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1083 		int qos_scaled_relprio;
1084 
1085 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1086 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1087 
1088 		priority = thread_qos_policy_params.qos_pri[qos];
1089 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1090 
1091 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1092 			/* Bump priority 46 to 47 when in a frontmost app */
1093 			qos_scaled_relprio += 1;
1094 		}
1095 
1096 		/* TODO: factor in renice priority here? */
1097 
1098 		priority += qos_scaled_relprio;
1099 	} else {
1100 		if (thread->importance > MAXPRI) {
1101 			priority = MAXPRI;
1102 		} else if (thread->importance < -MAXPRI) {
1103 			priority = -MAXPRI;
1104 		} else {
1105 			priority = thread->importance;
1106 		}
1107 
1108 		priority += thread->task_priority;
1109 	}
1110 
1111 	priority = MAX(priority, thread->user_promotion_basepri);
1112 
1113 	/*
1114 	 * Clamp priority back into the allowed range for this task.
1115 	 *  The initial priority value could be out of this range due to:
1116 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1117 	 *      Task is user task (max-pri is 63)
1118 	 *      Task is kernel task (max-pri is 95)
1119 	 * Note that thread->importance is user-settable to any integer
1120 	 * via THREAD_PRECEDENCE_POLICY.
1121 	 */
1122 	adj_priority = priority;
1123 	adj_priority = MIN(adj_priority, thread->max_priority);
1124 	adj_priority = MAX(adj_priority, MINPRI);
1125 
1126 	/* Allow workload driven priorities to exceed max_priority. */
1127 	if (thread->effective_policy.thep_wi_driven) {
1128 		adj_priority = MAX(adj_priority, priority);
1129 	}
1130 
1131 	/* Allow priority to exceed max_priority for promotions. */
1132 	if (thread->effective_policy.thep_promote_above_task) {
1133 		adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1134 	}
1135 	priority = adj_priority;
1136 	assert3u(priority, <=, MAXPRI);
1137 
1138 	if (thread->saved_mode == TH_MODE_REALTIME &&
1139 	    thread->sched_flags & TH_SFLAG_FAILSAFE) {
1140 		priority = DEPRESSPRI;
1141 	}
1142 
1143 	if (thread->effective_policy.thep_terminated == TRUE) {
1144 		/*
1145 		 * We temporarily want to override the expected priority to
1146 		 * ensure that the thread exits in a timely manner.
1147 		 * Note that this is allowed to exceed thread->max_priority
1148 		 * so that the thread is no longer clamped to background
1149 		 * during the final exit phase.
1150 		 */
1151 		if (priority < thread->task_priority) {
1152 			priority = thread->task_priority;
1153 		}
1154 		if (priority < BASEPRI_DEFAULT) {
1155 			priority = BASEPRI_DEFAULT;
1156 		}
1157 	}
1158 
1159 #if !defined(XNU_TARGET_OS_OSX)
1160 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1161 	if (priority < MAXPRI_THROTTLE) {
1162 		priority = MAXPRI_THROTTLE;
1163 	}
1164 #endif /* !defined(XNU_TARGET_OS_OSX) */
1165 
1166 	sched_set_thread_base_priority(thread, priority);
1167 }
1168 
1169 /* Called with the task lock held, but not the thread mutex or spinlock */
1170 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1171 thread_policy_update_tasklocked(
1172 	thread_t           thread,
1173 	integer_t          priority,
1174 	integer_t          max_priority,
1175 	task_pend_token_t  pend_token)
1176 {
1177 	thread_mtx_lock(thread);
1178 
1179 	if (!thread->active || thread->policy_reset) {
1180 		thread_mtx_unlock(thread);
1181 		return;
1182 	}
1183 
1184 	spl_t s = splsched();
1185 	thread_lock(thread);
1186 
1187 	__unused
1188 	integer_t old_max_priority = thread->max_priority;
1189 
1190 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1191 	thread->task_priority = (int16_t)priority;
1192 
1193 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1194 	thread->max_priority = (int16_t)max_priority;
1195 
1196 	/*
1197 	 * When backgrounding a thread, realtime and fixed priority threads
1198 	 * should be demoted to timeshare background threads.
1199 	 *
1200 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1201 	 * remove/reinsert for a runnable thread
1202 	 */
1203 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1204 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1205 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1206 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1207 	}
1208 
1209 	thread_policy_update_spinlocked(thread, true, pend_token);
1210 
1211 	thread_unlock(thread);
1212 	splx(s);
1213 
1214 	thread_mtx_unlock(thread);
1215 }
1216 
1217 /*
1218  * Reset thread to default state in preparation for termination
1219  * Called with thread mutex locked
1220  *
1221  * Always called on current thread, so we don't need a run queue remove
1222  */
1223 void
thread_policy_reset(thread_t thread)1224 thread_policy_reset(
1225 	thread_t                thread)
1226 {
1227 	spl_t           s;
1228 
1229 	assert(thread == current_thread());
1230 
1231 	s = splsched();
1232 	thread_lock(thread);
1233 
1234 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1235 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1236 	}
1237 
1238 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1239 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1240 	}
1241 
1242 	if (thread->sched_flags & TH_SFLAG_RT_RESTRICTED) {
1243 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
1244 	}
1245 
1246 	/* At this point, the various demotions should be inactive */
1247 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1248 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1249 
1250 	/* Reset thread back to task-default basepri and mode  */
1251 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1252 
1253 	sched_set_thread_mode(thread, newmode);
1254 
1255 	thread->importance = 0;
1256 
1257 	/* Prevent further changes to thread base priority or mode */
1258 	thread->policy_reset = 1;
1259 
1260 	sched_set_thread_base_priority(thread, thread->task_priority);
1261 
1262 	thread_unlock(thread);
1263 	splx(s);
1264 }
1265 
1266 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1267 thread_policy_get(
1268 	thread_t                                thread,
1269 	thread_policy_flavor_t  flavor,
1270 	thread_policy_t                 policy_info,
1271 	mach_msg_type_number_t  *count,
1272 	boolean_t                               *get_default)
1273 {
1274 	kern_return_t                   result = KERN_SUCCESS;
1275 
1276 	if (thread == THREAD_NULL) {
1277 		return KERN_INVALID_ARGUMENT;
1278 	}
1279 
1280 	thread_mtx_lock(thread);
1281 	if (!thread->active) {
1282 		thread_mtx_unlock(thread);
1283 
1284 		return KERN_TERMINATED;
1285 	}
1286 
1287 	switch (flavor) {
1288 	case THREAD_EXTENDED_POLICY:
1289 	{
1290 		boolean_t               timeshare = TRUE;
1291 
1292 		if (!(*get_default)) {
1293 			spl_t s = splsched();
1294 			thread_lock(thread);
1295 
1296 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1297 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1298 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1299 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1300 				} else {
1301 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1302 				}
1303 			} else {
1304 				*get_default = TRUE;
1305 			}
1306 
1307 			thread_unlock(thread);
1308 			splx(s);
1309 		}
1310 
1311 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1312 			thread_extended_policy_t        info;
1313 
1314 			info = (thread_extended_policy_t)policy_info;
1315 			info->timeshare = timeshare;
1316 		}
1317 
1318 		break;
1319 	}
1320 
1321 	case THREAD_TIME_CONSTRAINT_POLICY:
1322 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1323 	{
1324 		thread_time_constraint_with_priority_policy_t         info;
1325 
1326 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1327 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1328 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1329 
1330 		if (*count < min_count) {
1331 			result = KERN_INVALID_ARGUMENT;
1332 			break;
1333 		}
1334 
1335 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1336 
1337 		if (!(*get_default)) {
1338 			spl_t s = splsched();
1339 			thread_lock(thread);
1340 
1341 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1342 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1343 				info->period = thread->realtime.period;
1344 				info->computation = thread->realtime.computation;
1345 				info->constraint = thread->realtime.constraint;
1346 				info->preemptible = thread->realtime.preemptible;
1347 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1348 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1349 				}
1350 			} else {
1351 				*get_default = TRUE;
1352 			}
1353 
1354 			thread_unlock(thread);
1355 			splx(s);
1356 		}
1357 
1358 		if (*get_default) {
1359 			info->period = 0;
1360 			info->computation = default_timeshare_computation;
1361 			info->constraint = default_timeshare_constraint;
1362 			info->preemptible = TRUE;
1363 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1364 				info->priority = BASEPRI_RTQUEUES;
1365 			}
1366 		}
1367 
1368 
1369 		break;
1370 	}
1371 
1372 	case THREAD_PRECEDENCE_POLICY:
1373 	{
1374 		thread_precedence_policy_t              info;
1375 
1376 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1377 			result = KERN_INVALID_ARGUMENT;
1378 			break;
1379 		}
1380 
1381 		info = (thread_precedence_policy_t)policy_info;
1382 
1383 		if (!(*get_default)) {
1384 			spl_t s = splsched();
1385 			thread_lock(thread);
1386 
1387 			info->importance = thread->importance;
1388 
1389 			thread_unlock(thread);
1390 			splx(s);
1391 		} else {
1392 			info->importance = 0;
1393 		}
1394 
1395 		break;
1396 	}
1397 
1398 	case THREAD_AFFINITY_POLICY:
1399 	{
1400 		thread_affinity_policy_t                info;
1401 
1402 		if (!thread_affinity_is_supported()) {
1403 			result = KERN_NOT_SUPPORTED;
1404 			break;
1405 		}
1406 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1407 			result = KERN_INVALID_ARGUMENT;
1408 			break;
1409 		}
1410 
1411 		info = (thread_affinity_policy_t)policy_info;
1412 
1413 		if (!(*get_default)) {
1414 			info->affinity_tag = thread_affinity_get(thread);
1415 		} else {
1416 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1417 		}
1418 
1419 		break;
1420 	}
1421 
1422 	case THREAD_POLICY_STATE:
1423 	{
1424 		thread_policy_state_t           info;
1425 
1426 		if (*count < THREAD_POLICY_STATE_COUNT) {
1427 			result = KERN_INVALID_ARGUMENT;
1428 			break;
1429 		}
1430 
1431 		/* Only root can get this info */
1432 		if (!task_is_privileged(current_task())) {
1433 			result = KERN_PROTECTION_FAILURE;
1434 			break;
1435 		}
1436 
1437 		info = (thread_policy_state_t)(void*)policy_info;
1438 
1439 		if (!(*get_default)) {
1440 			info->flags = 0;
1441 
1442 			spl_t s = splsched();
1443 			thread_lock(thread);
1444 
1445 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1446 
1447 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1448 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1449 
1450 			info->thps_user_promotions          = 0;
1451 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1452 			info->thps_ipc_overrides            = thread->kevent_overrides;
1453 
1454 			proc_get_thread_policy_bitfield(thread, info);
1455 
1456 			thread_unlock(thread);
1457 			splx(s);
1458 		} else {
1459 			info->requested = 0;
1460 			info->effective = 0;
1461 			info->pending = 0;
1462 		}
1463 
1464 		break;
1465 	}
1466 
1467 	case THREAD_REQUESTED_STATE_POLICY:
1468 	{
1469 		if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1470 			result = KERN_INVALID_ARGUMENT;
1471 			break;
1472 		}
1473 
1474 		thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1475 		struct thread_requested_policy *req_policy = &thread->requested_policy;
1476 
1477 		info->thrq_base_qos = req_policy->thrp_qos;
1478 		info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1479 		info->thrq_qos_override = req_policy->thrp_qos_override;
1480 		info->thrq_qos_promote = req_policy->thrp_qos_promote;
1481 		info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1482 		info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1483 		info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1484 
1485 		break;
1486 	}
1487 
1488 	case THREAD_LATENCY_QOS_POLICY:
1489 	{
1490 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1491 		thread_latency_qos_t plqos;
1492 
1493 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1494 			result = KERN_INVALID_ARGUMENT;
1495 			break;
1496 		}
1497 
1498 		if (*get_default) {
1499 			plqos = 0;
1500 		} else {
1501 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1502 		}
1503 
1504 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1505 	}
1506 	break;
1507 
1508 	case THREAD_THROUGHPUT_QOS_POLICY:
1509 	{
1510 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1511 		thread_throughput_qos_t ptqos;
1512 
1513 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1514 			result = KERN_INVALID_ARGUMENT;
1515 			break;
1516 		}
1517 
1518 		if (*get_default) {
1519 			ptqos = 0;
1520 		} else {
1521 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1522 		}
1523 
1524 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1525 	}
1526 	break;
1527 
1528 	case THREAD_QOS_POLICY:
1529 	{
1530 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1531 
1532 		if (*count < THREAD_QOS_POLICY_COUNT) {
1533 			result = KERN_INVALID_ARGUMENT;
1534 			break;
1535 		}
1536 
1537 		if (!(*get_default)) {
1538 			int relprio_value = 0;
1539 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1540 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1541 
1542 			info->tier_importance = -relprio_value;
1543 		} else {
1544 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1545 			info->tier_importance = 0;
1546 		}
1547 
1548 		break;
1549 	}
1550 
1551 	default:
1552 		result = KERN_INVALID_ARGUMENT;
1553 		break;
1554 	}
1555 
1556 	thread_mtx_unlock(thread);
1557 
1558 	return result;
1559 }
1560 
1561 void
thread_policy_create(thread_t thread)1562 thread_policy_create(thread_t thread)
1563 {
1564 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1565 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1566 	    thread_tid(thread), theffective_0(thread),
1567 	    theffective_1(thread), thread->base_pri, 0);
1568 
1569 	/* We pass a pend token but ignore it */
1570 	struct task_pend_token pend_token = {};
1571 
1572 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1573 
1574 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1575 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1576 	    thread_tid(thread), theffective_0(thread),
1577 	    theffective_1(thread), thread->base_pri, 0);
1578 }
1579 
1580 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1581 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1582 {
1583 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1584 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1585 	    thread_tid(thread), theffective_0(thread),
1586 	    theffective_1(thread), thread->base_pri, 0);
1587 
1588 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1589 
1590 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1591 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1592 	    thread_tid(thread), theffective_0(thread),
1593 	    theffective_1(thread), thread->base_pri, 0);
1594 }
1595 
1596 
1597 
1598 /*
1599  * One thread state update function TO RULE THEM ALL
1600  *
1601  * This function updates the thread effective policy fields
1602  * and pushes the results to the relevant subsystems.
1603  *
1604  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1605  */
1606 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1607 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1608     task_pend_token_t pend_token)
1609 {
1610 	/*
1611 	 * Step 1:
1612 	 *  Gather requested policy and effective task state
1613 	 */
1614 
1615 	const struct thread_requested_policy requested = thread->requested_policy;
1616 	const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1617 
1618 	/*
1619 	 * Step 2:
1620 	 *  Calculate new effective policies from requested policy, task and thread state
1621 	 *  Rules:
1622 	 *      Don't change requested, it won't take effect
1623 	 */
1624 
1625 	struct thread_effective_policy next = {};
1626 
1627 	next.thep_wi_driven = requested.thrp_wi_driven;
1628 
1629 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1630 
1631 	uint32_t next_qos = requested.thrp_qos;
1632 
1633 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1634 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1635 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1636 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1637 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1638 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1639 	}
1640 
1641 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1642 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1643 		/*
1644 		 * This thread is turnstile-boosted higher than the adaptive clamp
1645 		 * by a synchronous waiter. Allow that to override the adaptive
1646 		 * clamp temporarily for this thread only.
1647 		 */
1648 		next.thep_promote_above_task = true;
1649 		next_qos = requested.thrp_qos_promote;
1650 	}
1651 
1652 	next.thep_qos = next_qos;
1653 
1654 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1655 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1656 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1657 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1658 		} else {
1659 			next.thep_qos = task_effective.tep_qos_clamp;
1660 		}
1661 		next.thep_wi_driven = 0;
1662 	}
1663 
1664 	/*
1665 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1666 	 * This allows QoS promotions to work properly even after the process is unclamped.
1667 	 */
1668 	next.thep_qos_promote = next.thep_qos;
1669 
1670 	/* The ceiling only applies to threads that are in the QoS world */
1671 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1672 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1673 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1674 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1675 	}
1676 
1677 	/*
1678 	 * The QoS relative priority is only applicable when the original programmer's
1679 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1680 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1681 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1682 	 * presence of boosting, the programmer doesn't know what other actors
1683 	 * are boosting the thread.
1684 	 */
1685 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1686 	    (requested.thrp_qos == next.thep_qos) &&
1687 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1688 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1689 	} else {
1690 		next.thep_qos_relprio = 0;
1691 	}
1692 
1693 	/* Calculate DARWIN_BG */
1694 	bool wants_darwinbg        = false;
1695 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1696 
1697 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1698 		wants_darwinbg = true;
1699 	}
1700 
1701 	/*
1702 	 * If DARWIN_BG has been requested at either level, it's engaged.
1703 	 * darwinbg threads always create bg sockets,
1704 	 * but only some types of darwinbg change the sockets
1705 	 * after they're created
1706 	 */
1707 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1708 		wants_all_sockets_bg = wants_darwinbg = true;
1709 	}
1710 
1711 	if (requested.thrp_pidbind_bg) {
1712 		wants_all_sockets_bg = wants_darwinbg = true;
1713 	}
1714 
1715 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1716 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1717 		wants_darwinbg = true;
1718 	}
1719 
1720 	/* Calculate side effects of DARWIN_BG */
1721 
1722 	if (wants_darwinbg) {
1723 		next.thep_darwinbg = 1;
1724 		next.thep_wi_driven = 0;
1725 	}
1726 
1727 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1728 		next.thep_new_sockets_bg = 1;
1729 	}
1730 
1731 	/* Don't use task_effective.tep_all_sockets_bg here */
1732 	if (wants_all_sockets_bg) {
1733 		next.thep_all_sockets_bg = 1;
1734 	}
1735 
1736 	/* darwinbg implies background QOS (or lower) */
1737 	if (next.thep_darwinbg &&
1738 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1739 		next.thep_qos = THREAD_QOS_BACKGROUND;
1740 		next.thep_qos_relprio = 0;
1741 	}
1742 
1743 	/* Calculate IO policy */
1744 
1745 	int iopol = THROTTLE_LEVEL_TIER0;
1746 
1747 	/* Factor in the task's IO policy */
1748 	if (next.thep_darwinbg) {
1749 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1750 	}
1751 
1752 	if (!next.thep_promote_above_task) {
1753 		iopol = MAX(iopol, task_effective.tep_io_tier);
1754 	}
1755 
1756 	/* Look up the associated IO tier value for the QoS class */
1757 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1758 
1759 	iopol = MAX(iopol, requested.thrp_int_iotier);
1760 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1761 
1762 	/* Apply the kevent iotier override */
1763 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1764 
1765 	next.thep_io_tier = iopol;
1766 
1767 	/*
1768 	 * If a QoS override is causing IO to go into a lower tier, we also set
1769 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1770 	 * window when the override goes away.
1771 	 */
1772 
1773 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1774 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1775 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1776 
1777 	/* Calculate Passive IO policy */
1778 	if (requested.thrp_ext_iopassive ||
1779 	    requested.thrp_int_iopassive ||
1780 	    qos_io_override_active ||
1781 	    task_effective.tep_io_passive) {
1782 		next.thep_io_passive = 1;
1783 	}
1784 
1785 	/* Calculate timer QOS */
1786 	uint32_t latency_qos = requested.thrp_latency_qos;
1787 
1788 	if (!next.thep_promote_above_task) {
1789 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1790 	}
1791 
1792 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1793 
1794 	next.thep_latency_qos = latency_qos;
1795 
1796 	/* Calculate throughput QOS */
1797 	uint32_t through_qos = requested.thrp_through_qos;
1798 
1799 	if (!next.thep_promote_above_task) {
1800 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1801 	}
1802 
1803 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1804 
1805 	next.thep_through_qos = through_qos;
1806 
1807 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1808 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1809 		next.thep_terminated    = 1;
1810 		next.thep_darwinbg      = 0;
1811 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1812 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1813 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1814 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1815 		next.thep_wi_driven     = 0;
1816 	}
1817 
1818 	/*
1819 	 * Step 3:
1820 	 *  Swap out old policy for new policy
1821 	 */
1822 
1823 	struct thread_effective_policy prev = thread->effective_policy;
1824 
1825 	thread_update_qos_cpu_time_locked(thread);
1826 
1827 	/* This is the point where the new values become visible to other threads */
1828 	thread->effective_policy = next;
1829 
1830 	/*
1831 	 * Step 4:
1832 	 *  Pend updates that can't be done while holding the thread lock
1833 	 */
1834 
1835 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1836 		pend_token->tpt_update_sockets = 1;
1837 	}
1838 
1839 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1840 	if (prev.thep_io_tier != next.thep_io_tier) {
1841 		pend_token->tpt_update_throttle = 1;
1842 	}
1843 
1844 	/*
1845 	 * Check for the attributes that sfi_thread_classify() consults,
1846 	 *  and trigger SFI re-evaluation.
1847 	 */
1848 	if (prev.thep_qos != next.thep_qos ||
1849 	    prev.thep_darwinbg != next.thep_darwinbg) {
1850 		pend_token->tpt_update_thread_sfi = 1;
1851 	}
1852 
1853 	integer_t old_base_pri = thread->base_pri;
1854 
1855 	/*
1856 	 * Step 5:
1857 	 *  Update other subsystems as necessary if something has changed
1858 	 */
1859 
1860 	/* Check for the attributes that thread_recompute_priority() consults */
1861 	if (prev.thep_qos != next.thep_qos ||
1862 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1863 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1864 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1865 	    prev.thep_terminated != next.thep_terminated ||
1866 	    prev.thep_wi_driven != next.thep_wi_driven ||
1867 	    pend_token->tpt_force_recompute_pri == 1 ||
1868 	    recompute_priority) {
1869 		thread_recompute_priority(thread);
1870 	}
1871 
1872 	/*
1873 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1874 	 */
1875 	if (pend_token->tpt_update_turnstile &&
1876 	    ((old_base_pri == thread->base_pri) ||
1877 	    !thread_get_waiting_turnstile(thread))) {
1878 		/*
1879 		 * Reset update turnstile pend token since either
1880 		 * the thread priority did not change or thread is
1881 		 * not blocked on a turnstile.
1882 		 */
1883 		pend_token->tpt_update_turnstile = 0;
1884 	}
1885 }
1886 
1887 
1888 /*
1889  * Initiate a thread policy state transition on a thread with its TID
1890  * Useful if you cannot guarantee the thread won't get terminated
1891  * Precondition: No locks are held
1892  * Will take task lock - using the non-tid variant is faster
1893  * if you already have a thread ref.
1894  */
1895 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1896 proc_set_thread_policy_with_tid(task_t     task,
1897     uint64_t   tid,
1898     int        category,
1899     int        flavor,
1900     int        value)
1901 {
1902 	/* takes task lock, returns ref'ed thread or NULL */
1903 	thread_t thread = task_findtid(task, tid);
1904 
1905 	if (thread == THREAD_NULL) {
1906 		return;
1907 	}
1908 
1909 	proc_set_thread_policy(thread, category, flavor, value);
1910 
1911 	thread_deallocate(thread);
1912 }
1913 
1914 /*
1915  * Initiate a thread policy transition on a thread
1916  * This path supports networking transitions (i.e. darwinbg transitions)
1917  * Precondition: No locks are held
1918  */
1919 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1920 proc_set_thread_policy(thread_t   thread,
1921     int        category,
1922     int        flavor,
1923     int        value)
1924 {
1925 	struct task_pend_token pend_token = {};
1926 
1927 	thread_mtx_lock(thread);
1928 
1929 	proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1930 
1931 	thread_mtx_unlock(thread);
1932 
1933 	thread_policy_update_complete_unlocked(thread, &pend_token);
1934 }
1935 
1936 /*
1937  * Do the things that can't be done while holding a thread mutex.
1938  * These are set up to call back into thread policy to get the latest value,
1939  * so they don't have to be synchronized with the update.
1940  * The only required semantic is 'call this sometime after updating effective policy'
1941  *
1942  * Precondition: Thread mutex is not held
1943  *
1944  * This may be called with the task lock held, but in that case it won't be
1945  * called with tpt_update_sockets set.
1946  */
1947 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1948 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1949 {
1950 #ifdef MACH_BSD
1951 	if (pend_token->tpt_update_sockets) {
1952 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1953 	}
1954 #endif /* MACH_BSD */
1955 
1956 	if (pend_token->tpt_update_throttle) {
1957 		rethrottle_thread(get_bsdthread_info(thread));
1958 	}
1959 
1960 	if (pend_token->tpt_update_thread_sfi) {
1961 		sfi_reevaluate(thread);
1962 	}
1963 
1964 	if (pend_token->tpt_update_turnstile) {
1965 		turnstile_update_thread_priority_chain(thread);
1966 	}
1967 }
1968 
1969 /*
1970  * Set and update thread policy
1971  * Thread mutex might be held
1972  */
1973 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1974 proc_set_thread_policy_locked(thread_t          thread,
1975     int               category,
1976     int               flavor,
1977     int               value,
1978     int               value2,
1979     task_pend_token_t pend_token)
1980 {
1981 	spl_t s = splsched();
1982 	thread_lock(thread);
1983 
1984 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1985 
1986 	thread_unlock(thread);
1987 	splx(s);
1988 }
1989 
1990 /*
1991  * Set and update thread policy
1992  * Thread spinlock is held
1993  */
1994 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1995 proc_set_thread_policy_spinlocked(thread_t          thread,
1996     int               category,
1997     int               flavor,
1998     int               value,
1999     int               value2,
2000     task_pend_token_t pend_token)
2001 {
2002 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2003 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2004 	    thread_tid(thread), threquested_0(thread),
2005 	    threquested_1(thread), value, 0);
2006 
2007 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2008 
2009 	thread_policy_update_spinlocked(thread, false, pend_token);
2010 
2011 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2012 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2013 	    thread_tid(thread), threquested_0(thread),
2014 	    threquested_1(thread), tpending(pend_token), 0);
2015 }
2016 
2017 /*
2018  * Set the requested state for a specific flavor to a specific value.
2019  */
2020 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)2021 thread_set_requested_policy_spinlocked(thread_t     thread,
2022     int               category,
2023     int               flavor,
2024     int               value,
2025     int               value2,
2026     task_pend_token_t pend_token)
2027 {
2028 	int tier, passive;
2029 
2030 	struct thread_requested_policy requested = thread->requested_policy;
2031 
2032 	switch (flavor) {
2033 	/* Category: EXTERNAL and INTERNAL, thread and task */
2034 
2035 	case TASK_POLICY_DARWIN_BG:
2036 		if (category == TASK_POLICY_EXTERNAL) {
2037 			requested.thrp_ext_darwinbg = value;
2038 		} else {
2039 			requested.thrp_int_darwinbg = value;
2040 		}
2041 		break;
2042 
2043 	case TASK_POLICY_IOPOL:
2044 		proc_iopol_to_tier(value, &tier, &passive);
2045 		if (category == TASK_POLICY_EXTERNAL) {
2046 			requested.thrp_ext_iotier  = tier;
2047 			requested.thrp_ext_iopassive = passive;
2048 		} else {
2049 			requested.thrp_int_iotier  = tier;
2050 			requested.thrp_int_iopassive = passive;
2051 		}
2052 		break;
2053 
2054 	case TASK_POLICY_IO:
2055 		if (category == TASK_POLICY_EXTERNAL) {
2056 			requested.thrp_ext_iotier = value;
2057 		} else {
2058 			requested.thrp_int_iotier = value;
2059 		}
2060 		break;
2061 
2062 	case TASK_POLICY_PASSIVE_IO:
2063 		if (category == TASK_POLICY_EXTERNAL) {
2064 			requested.thrp_ext_iopassive = value;
2065 		} else {
2066 			requested.thrp_int_iopassive = value;
2067 		}
2068 		break;
2069 
2070 	/* Category: ATTRIBUTE, thread only */
2071 
2072 	case TASK_POLICY_PIDBIND_BG:
2073 		assert(category == TASK_POLICY_ATTRIBUTE);
2074 		requested.thrp_pidbind_bg = value;
2075 		break;
2076 
2077 	case TASK_POLICY_LATENCY_QOS:
2078 		assert(category == TASK_POLICY_ATTRIBUTE);
2079 		requested.thrp_latency_qos = value;
2080 		break;
2081 
2082 	case TASK_POLICY_THROUGH_QOS:
2083 		assert(category == TASK_POLICY_ATTRIBUTE);
2084 		requested.thrp_through_qos = value;
2085 		break;
2086 
2087 	case TASK_POLICY_QOS_OVERRIDE:
2088 		assert(category == TASK_POLICY_ATTRIBUTE);
2089 		requested.thrp_qos_override = value;
2090 		pend_token->tpt_update_turnstile = 1;
2091 		break;
2092 
2093 	case TASK_POLICY_QOS_AND_RELPRIO:
2094 		assert(category == TASK_POLICY_ATTRIBUTE);
2095 		requested.thrp_qos = value;
2096 		requested.thrp_qos_relprio = value2;
2097 		pend_token->tpt_update_turnstile = 1;
2098 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2099 		break;
2100 
2101 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2102 		assert(category == TASK_POLICY_ATTRIBUTE);
2103 		requested.thrp_qos_workq_override = value;
2104 		pend_token->tpt_update_turnstile = 1;
2105 		break;
2106 
2107 	case TASK_POLICY_QOS_PROMOTE:
2108 		assert(category == TASK_POLICY_ATTRIBUTE);
2109 		requested.thrp_qos_promote = value;
2110 		break;
2111 
2112 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2113 		assert(category == TASK_POLICY_ATTRIBUTE);
2114 		requested.thrp_qos_kevent_override = value;
2115 		pend_token->tpt_update_turnstile = 1;
2116 		break;
2117 
2118 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2119 		assert(category == TASK_POLICY_ATTRIBUTE);
2120 		requested.thrp_qos_wlsvc_override = value;
2121 		pend_token->tpt_update_turnstile = 1;
2122 		break;
2123 
2124 	case TASK_POLICY_TERMINATED:
2125 		assert(category == TASK_POLICY_ATTRIBUTE);
2126 		requested.thrp_terminated = value;
2127 		break;
2128 
2129 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2130 		assert(category == TASK_POLICY_ATTRIBUTE);
2131 		requested.thrp_iotier_kevent_override = value;
2132 		break;
2133 
2134 	case TASK_POLICY_WI_DRIVEN:
2135 		assert(category == TASK_POLICY_ATTRIBUTE);
2136 		assert(thread == current_thread());
2137 
2138 		if (value != TH_MODE_NONE) {
2139 			thread->static_param = true;
2140 			sched_set_thread_mode_user(thread, value);
2141 			requested.thrp_wi_driven = 1;
2142 		} else {
2143 			sched_set_thread_mode_user(thread, TH_MODE_TIMESHARE);
2144 			requested.thrp_wi_driven = 0;
2145 		}
2146 		break;
2147 
2148 	default:
2149 		panic("unknown task policy: %d %d %d", category, flavor, value);
2150 		break;
2151 	}
2152 
2153 	thread->requested_policy = requested;
2154 }
2155 
2156 /*
2157  * Gets what you set. Effective values may be different.
2158  * Precondition: No locks are held
2159  */
2160 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2161 proc_get_thread_policy(thread_t   thread,
2162     int        category,
2163     int        flavor)
2164 {
2165 	int value = 0;
2166 	thread_mtx_lock(thread);
2167 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2168 	thread_mtx_unlock(thread);
2169 	return value;
2170 }
2171 
2172 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2173 proc_get_thread_policy_locked(thread_t   thread,
2174     int        category,
2175     int        flavor,
2176     int*       value2)
2177 {
2178 	int value = 0;
2179 
2180 	spl_t s = splsched();
2181 	thread_lock(thread);
2182 
2183 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2184 
2185 	thread_unlock(thread);
2186 	splx(s);
2187 
2188 	return value;
2189 }
2190 
2191 /*
2192  * Gets what you set. Effective values may be different.
2193  */
2194 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2195 thread_get_requested_policy_spinlocked(thread_t thread,
2196     int      category,
2197     int      flavor,
2198     int*     value2)
2199 {
2200 	int value = 0;
2201 
2202 	struct thread_requested_policy requested = thread->requested_policy;
2203 
2204 	switch (flavor) {
2205 	case TASK_POLICY_DARWIN_BG:
2206 		if (category == TASK_POLICY_EXTERNAL) {
2207 			value = requested.thrp_ext_darwinbg;
2208 		} else {
2209 			value = requested.thrp_int_darwinbg;
2210 		}
2211 		break;
2212 	case TASK_POLICY_IOPOL:
2213 		if (category == TASK_POLICY_EXTERNAL) {
2214 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2215 			    requested.thrp_ext_iopassive);
2216 		} else {
2217 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2218 			    requested.thrp_int_iopassive);
2219 		}
2220 		break;
2221 	case TASK_POLICY_IO:
2222 		if (category == TASK_POLICY_EXTERNAL) {
2223 			value = requested.thrp_ext_iotier;
2224 		} else {
2225 			value = requested.thrp_int_iotier;
2226 		}
2227 		break;
2228 	case TASK_POLICY_PASSIVE_IO:
2229 		if (category == TASK_POLICY_EXTERNAL) {
2230 			value = requested.thrp_ext_iopassive;
2231 		} else {
2232 			value = requested.thrp_int_iopassive;
2233 		}
2234 		break;
2235 	case TASK_POLICY_QOS:
2236 		assert(category == TASK_POLICY_ATTRIBUTE);
2237 		value = requested.thrp_qos;
2238 		break;
2239 	case TASK_POLICY_QOS_OVERRIDE:
2240 		assert(category == TASK_POLICY_ATTRIBUTE);
2241 		value = requested.thrp_qos_override;
2242 		break;
2243 	case TASK_POLICY_LATENCY_QOS:
2244 		assert(category == TASK_POLICY_ATTRIBUTE);
2245 		value = requested.thrp_latency_qos;
2246 		break;
2247 	case TASK_POLICY_THROUGH_QOS:
2248 		assert(category == TASK_POLICY_ATTRIBUTE);
2249 		value = requested.thrp_through_qos;
2250 		break;
2251 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2252 		assert(category == TASK_POLICY_ATTRIBUTE);
2253 		value = requested.thrp_qos_workq_override;
2254 		break;
2255 	case TASK_POLICY_QOS_AND_RELPRIO:
2256 		assert(category == TASK_POLICY_ATTRIBUTE);
2257 		assert(value2 != NULL);
2258 		value = requested.thrp_qos;
2259 		*value2 = requested.thrp_qos_relprio;
2260 		break;
2261 	case TASK_POLICY_QOS_PROMOTE:
2262 		assert(category == TASK_POLICY_ATTRIBUTE);
2263 		value = requested.thrp_qos_promote;
2264 		break;
2265 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2266 		assert(category == TASK_POLICY_ATTRIBUTE);
2267 		value = requested.thrp_qos_kevent_override;
2268 		break;
2269 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2270 		assert(category == TASK_POLICY_ATTRIBUTE);
2271 		value = requested.thrp_qos_wlsvc_override;
2272 		break;
2273 	case TASK_POLICY_TERMINATED:
2274 		assert(category == TASK_POLICY_ATTRIBUTE);
2275 		value = requested.thrp_terminated;
2276 		break;
2277 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2278 		assert(category == TASK_POLICY_ATTRIBUTE);
2279 		value = requested.thrp_iotier_kevent_override;
2280 		break;
2281 
2282 	case TASK_POLICY_WI_DRIVEN:
2283 		assert(category == TASK_POLICY_ATTRIBUTE);
2284 		value = requested.thrp_wi_driven;
2285 		break;
2286 
2287 	default:
2288 		panic("unknown policy_flavor %d", flavor);
2289 		break;
2290 	}
2291 
2292 	return value;
2293 }
2294 
2295 /*
2296  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2297  *
2298  * NOTE: This accessor does not take the task or thread lock.
2299  * Notifications of state updates need to be externally synchronized with state queries.
2300  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2301  * within the context of a timer interrupt.
2302  *
2303  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2304  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2305  *      I don't think that cost is worth not having the right answer.
2306  */
2307 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2308 proc_get_effective_thread_policy(thread_t thread,
2309     int      flavor)
2310 {
2311 	int value = 0;
2312 
2313 	switch (flavor) {
2314 	case TASK_POLICY_DARWIN_BG:
2315 		/*
2316 		 * This call is used within the timer layer, as well as
2317 		 * prioritizing requests to the graphics system.
2318 		 * It also informs SFI and originator-bg-state.
2319 		 * Returns 1 for background mode, 0 for normal mode
2320 		 */
2321 
2322 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2323 		break;
2324 	case TASK_POLICY_IO:
2325 		/*
2326 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2327 		 * Returns THROTTLE_LEVEL_* values
2328 		 */
2329 		value = thread->effective_policy.thep_io_tier;
2330 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2331 			value = MIN(value, thread->iotier_override);
2332 		}
2333 		break;
2334 	case TASK_POLICY_PASSIVE_IO:
2335 		/*
2336 		 * The I/O system calls here to find out whether an operation should be passive.
2337 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2338 		 * Returns 1 for passive mode, 0 for normal mode
2339 		 *
2340 		 * If an override is causing IO to go into a lower tier, we also set
2341 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2342 		 * window when the override goes away.
2343 		 */
2344 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2345 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2346 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2347 			value = 1;
2348 		}
2349 		break;
2350 	case TASK_POLICY_ALL_SOCKETS_BG:
2351 		/*
2352 		 * do_background_socket() calls this to determine whether
2353 		 * it should change the thread's sockets
2354 		 * Returns 1 for background mode, 0 for normal mode
2355 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2356 		 * doesn't get you out of the network throttle.
2357 		 */
2358 		value = (thread->effective_policy.thep_all_sockets_bg ||
2359 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2360 		break;
2361 	case TASK_POLICY_NEW_SOCKETS_BG:
2362 		/*
2363 		 * socreate() calls this to determine if it should mark a new socket as background
2364 		 * Returns 1 for background mode, 0 for normal mode
2365 		 */
2366 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2367 		break;
2368 	case TASK_POLICY_LATENCY_QOS:
2369 		/*
2370 		 * timer arming calls into here to find out the timer coalescing level
2371 		 * Returns a latency QoS tier (0-6)
2372 		 */
2373 		value = thread->effective_policy.thep_latency_qos;
2374 		break;
2375 	case TASK_POLICY_THROUGH_QOS:
2376 		/*
2377 		 * This value is passed into the urgency callout from the scheduler
2378 		 * to the performance management subsystem.
2379 		 *
2380 		 * Returns a throughput QoS tier (0-6)
2381 		 */
2382 		value = thread->effective_policy.thep_through_qos;
2383 		break;
2384 	case TASK_POLICY_QOS:
2385 		/*
2386 		 * This is communicated to the performance management layer and SFI.
2387 		 *
2388 		 * Returns a QoS policy tier
2389 		 */
2390 		value = thread->effective_policy.thep_qos;
2391 		break;
2392 	default:
2393 		panic("unknown thread policy flavor %d", flavor);
2394 		break;
2395 	}
2396 
2397 	return value;
2398 }
2399 
2400 
2401 /*
2402  * (integer_t) casts limit the number of bits we can fit here
2403  * this interface is deprecated and replaced by the _EXT struct ?
2404  */
2405 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2406 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2407 {
2408 	uint64_t bits = 0;
2409 	struct thread_requested_policy requested = thread->requested_policy;
2410 
2411 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2412 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2413 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2414 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2415 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2416 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2417 
2418 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2419 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2420 
2421 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2422 
2423 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2424 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2425 
2426 	info->requested = (integer_t) bits;
2427 	bits = 0;
2428 
2429 	struct thread_effective_policy effective = thread->effective_policy;
2430 
2431 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2432 
2433 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2434 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2435 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2436 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2437 
2438 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2439 
2440 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2441 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2442 
2443 	info->effective = (integer_t)bits;
2444 	bits = 0;
2445 
2446 	info->pending = 0;
2447 }
2448 
2449 /*
2450  * Sneakily trace either the task and thread requested
2451  * or just the thread requested, depending on if we have enough room.
2452  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2453  *
2454  *                                LP32            LP64
2455  * threquested_0(thread)          thread[0]       task[0]
2456  * threquested_1(thread)          thread[1]       thread[0]
2457  *
2458  */
2459 
2460 uintptr_t
threquested_0(thread_t thread)2461 threquested_0(thread_t thread)
2462 {
2463 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2464 
2465 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2466 
2467 	return raw[0];
2468 }
2469 
2470 uintptr_t
threquested_1(thread_t thread)2471 threquested_1(thread_t thread)
2472 {
2473 #if defined __LP64__
2474 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2475 #else
2476 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2477 	return raw[1];
2478 #endif
2479 }
2480 
2481 uintptr_t
theffective_0(thread_t thread)2482 theffective_0(thread_t thread)
2483 {
2484 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2485 
2486 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2487 	return raw[0];
2488 }
2489 
2490 uintptr_t
theffective_1(thread_t thread)2491 theffective_1(thread_t thread)
2492 {
2493 #if defined __LP64__
2494 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2495 #else
2496 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2497 	return raw[1];
2498 #endif
2499 }
2500 
2501 
2502 /*
2503  * Set an override on the thread which is consulted with a
2504  * higher priority than the task/thread policy. This should
2505  * only be set for temporary grants until the thread
2506  * returns to the userspace boundary
2507  *
2508  * We use atomic operations to swap in the override, with
2509  * the assumption that the thread itself can
2510  * read the override and clear it on return to userspace.
2511  *
2512  * No locking is performed, since it is acceptable to see
2513  * a stale override for one loop through throttle_lowpri_io().
2514  * However a thread reference must be held on the thread.
2515  */
2516 
2517 void
set_thread_iotier_override(thread_t thread,int policy)2518 set_thread_iotier_override(thread_t thread, int policy)
2519 {
2520 	int current_override;
2521 
2522 	/* Let most aggressive I/O policy win until user boundary */
2523 	do {
2524 		current_override = thread->iotier_override;
2525 
2526 		if (current_override != THROTTLE_LEVEL_NONE) {
2527 			policy = MIN(current_override, policy);
2528 		}
2529 
2530 		if (current_override == policy) {
2531 			/* no effective change */
2532 			return;
2533 		}
2534 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2535 
2536 	/*
2537 	 * Since the thread may be currently throttled,
2538 	 * re-evaluate tiers and potentially break out
2539 	 * of an msleep
2540 	 */
2541 	rethrottle_thread(get_bsdthread_info(thread));
2542 }
2543 
2544 /*
2545  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2546  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2547  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2548  * priority thread. In these cases, we attempt to propagate the priority token, as long
2549  * as the subsystem informs us of the relationships between the threads. The userspace
2550  * synchronization subsystem should maintain the information of owner->resource and
2551  * resource->waiters itself.
2552  */
2553 
2554 /*
2555  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2556  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2557  * to be handled specially in the future, but for now it's fine to slam
2558  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2559  */
2560 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2561 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2562 {
2563 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2564 		/* Map all input resource/type to a single one */
2565 		*resource = USER_ADDR_NULL;
2566 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2567 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2568 		/* no transform */
2569 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2570 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2571 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2572 			*resource = USER_ADDR_NULL;
2573 		}
2574 	}
2575 }
2576 
2577 /* This helper routine finds an existing override if known. Locking should be done by caller */
2578 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2579 find_qos_override(thread_t thread,
2580     user_addr_t resource,
2581     int resource_type)
2582 {
2583 	struct thread_qos_override *override;
2584 
2585 	override = thread->overrides;
2586 	while (override) {
2587 		if (override->override_resource == resource &&
2588 		    override->override_resource_type == resource_type) {
2589 			return override;
2590 		}
2591 
2592 		override = override->override_next;
2593 	}
2594 
2595 	return NULL;
2596 }
2597 
2598 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2599 find_and_decrement_qos_override(thread_t       thread,
2600     user_addr_t    resource,
2601     int            resource_type,
2602     boolean_t      reset,
2603     struct thread_qos_override **free_override_list)
2604 {
2605 	struct thread_qos_override *override, *override_prev;
2606 
2607 	override_prev = NULL;
2608 	override = thread->overrides;
2609 	while (override) {
2610 		struct thread_qos_override *override_next = override->override_next;
2611 
2612 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2613 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2614 			if (reset) {
2615 				override->override_contended_resource_count = 0;
2616 			} else {
2617 				override->override_contended_resource_count--;
2618 			}
2619 
2620 			if (override->override_contended_resource_count == 0) {
2621 				if (override_prev == NULL) {
2622 					thread->overrides = override_next;
2623 				} else {
2624 					override_prev->override_next = override_next;
2625 				}
2626 
2627 				/* Add to out-param for later zfree */
2628 				override->override_next = *free_override_list;
2629 				*free_override_list = override;
2630 			} else {
2631 				override_prev = override;
2632 			}
2633 
2634 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2635 				return;
2636 			}
2637 		} else {
2638 			override_prev = override;
2639 		}
2640 
2641 		override = override_next;
2642 	}
2643 }
2644 
2645 /* This helper recalculates the current requested override using the policy selected at boot */
2646 static int
calculate_requested_qos_override(thread_t thread)2647 calculate_requested_qos_override(thread_t thread)
2648 {
2649 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2650 		return THREAD_QOS_UNSPECIFIED;
2651 	}
2652 
2653 	/* iterate over all overrides and calculate MAX */
2654 	struct thread_qos_override *override;
2655 	int qos_override = THREAD_QOS_UNSPECIFIED;
2656 
2657 	override = thread->overrides;
2658 	while (override) {
2659 		qos_override = MAX(qos_override, override->override_qos);
2660 		override = override->override_next;
2661 	}
2662 
2663 	return qos_override;
2664 }
2665 
2666 /*
2667  * Returns:
2668  * - 0 on success
2669  * - EINVAL if some invalid input was passed
2670  */
2671 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2672 proc_thread_qos_add_override_internal(thread_t         thread,
2673     int              override_qos,
2674     boolean_t        first_override_for_resource,
2675     user_addr_t      resource,
2676     int              resource_type)
2677 {
2678 	struct task_pend_token pend_token = {};
2679 	int rc = 0;
2680 
2681 	thread_mtx_lock(thread);
2682 
2683 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2684 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2685 
2686 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2687 	    uint64_t, thread->requested_policy.thrp_qos,
2688 	    uint64_t, thread->effective_policy.thep_qos,
2689 	    int, override_qos, boolean_t, first_override_for_resource);
2690 
2691 	struct thread_qos_override *override;
2692 	struct thread_qos_override *override_new = NULL;
2693 	int new_qos_override, prev_qos_override;
2694 	int new_effective_qos;
2695 
2696 	canonicalize_resource_and_type(&resource, &resource_type);
2697 
2698 	override = find_qos_override(thread, resource, resource_type);
2699 	if (first_override_for_resource && !override) {
2700 		/* We need to allocate a new object. Drop the thread lock and
2701 		 * recheck afterwards in case someone else added the override
2702 		 */
2703 		thread_mtx_unlock(thread);
2704 		override_new = zalloc(thread_qos_override_zone);
2705 		thread_mtx_lock(thread);
2706 		override = find_qos_override(thread, resource, resource_type);
2707 	}
2708 	if (first_override_for_resource && override) {
2709 		/* Someone else already allocated while the thread lock was dropped */
2710 		override->override_contended_resource_count++;
2711 	} else if (!override && override_new) {
2712 		override = override_new;
2713 		override_new = NULL;
2714 		override->override_next = thread->overrides;
2715 		/* since first_override_for_resource was TRUE */
2716 		override->override_contended_resource_count = 1;
2717 		override->override_resource = resource;
2718 		override->override_resource_type = (int16_t)resource_type;
2719 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2720 		thread->overrides = override;
2721 	}
2722 
2723 	if (override) {
2724 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2725 			override->override_qos = (int16_t)override_qos;
2726 		} else {
2727 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2728 		}
2729 	}
2730 
2731 	/* Determine how to combine the various overrides into a single current
2732 	 * requested override
2733 	 */
2734 	new_qos_override = calculate_requested_qos_override(thread);
2735 
2736 	prev_qos_override = proc_get_thread_policy_locked(thread,
2737 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2738 
2739 	if (new_qos_override != prev_qos_override) {
2740 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2741 		    TASK_POLICY_QOS_OVERRIDE,
2742 		    new_qos_override, 0, &pend_token);
2743 	}
2744 
2745 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2746 
2747 	thread_mtx_unlock(thread);
2748 
2749 	thread_policy_update_complete_unlocked(thread, &pend_token);
2750 
2751 	if (override_new) {
2752 		zfree(thread_qos_override_zone, override_new);
2753 	}
2754 
2755 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2756 	    int, new_qos_override, int, new_effective_qos, int, rc);
2757 
2758 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2759 	    new_qos_override, resource, resource_type, 0, 0);
2760 
2761 	return rc;
2762 }
2763 
2764 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2765 proc_thread_qos_add_override(task_t           task,
2766     thread_t         thread,
2767     uint64_t         tid,
2768     int              override_qos,
2769     boolean_t        first_override_for_resource,
2770     user_addr_t      resource,
2771     int              resource_type)
2772 {
2773 	boolean_t has_thread_reference = FALSE;
2774 	int rc = 0;
2775 
2776 	if (thread == THREAD_NULL) {
2777 		thread = task_findtid(task, tid);
2778 		/* returns referenced thread */
2779 
2780 		if (thread == THREAD_NULL) {
2781 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2782 			    tid, 0, 0xdead, 0, 0);
2783 			return ESRCH;
2784 		}
2785 		has_thread_reference = TRUE;
2786 	} else {
2787 		assert(get_threadtask(thread) == task);
2788 	}
2789 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2790 	    first_override_for_resource, resource, resource_type);
2791 	if (has_thread_reference) {
2792 		thread_deallocate(thread);
2793 	}
2794 
2795 	return rc;
2796 }
2797 
2798 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2799 proc_thread_qos_remove_override_internal(thread_t       thread,
2800     user_addr_t    resource,
2801     int            resource_type,
2802     boolean_t      reset)
2803 {
2804 	struct task_pend_token pend_token = {};
2805 
2806 	struct thread_qos_override *deferred_free_override_list = NULL;
2807 	int new_qos_override, prev_qos_override, new_effective_qos;
2808 
2809 	thread_mtx_lock(thread);
2810 
2811 	canonicalize_resource_and_type(&resource, &resource_type);
2812 
2813 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2814 
2815 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2816 	    thread_tid(thread), resource, reset, 0, 0);
2817 
2818 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2819 	    uint64_t, thread->requested_policy.thrp_qos,
2820 	    uint64_t, thread->effective_policy.thep_qos);
2821 
2822 	/* Determine how to combine the various overrides into a single current requested override */
2823 	new_qos_override = calculate_requested_qos_override(thread);
2824 
2825 	spl_t s = splsched();
2826 	thread_lock(thread);
2827 
2828 	/*
2829 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2830 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2831 	 * This means you can't change the current override from a spinlock-only setter.
2832 	 */
2833 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2834 
2835 	if (new_qos_override != prev_qos_override) {
2836 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2837 	}
2838 
2839 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2840 
2841 	thread_unlock(thread);
2842 	splx(s);
2843 
2844 	thread_mtx_unlock(thread);
2845 
2846 	thread_policy_update_complete_unlocked(thread, &pend_token);
2847 
2848 	while (deferred_free_override_list) {
2849 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2850 
2851 		zfree(thread_qos_override_zone, deferred_free_override_list);
2852 		deferred_free_override_list = override_next;
2853 	}
2854 
2855 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2856 	    int, new_qos_override, int, new_effective_qos);
2857 
2858 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2859 	    thread_tid(thread), 0, 0, 0, 0);
2860 }
2861 
2862 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2863 proc_thread_qos_remove_override(task_t      task,
2864     thread_t    thread,
2865     uint64_t    tid,
2866     user_addr_t resource,
2867     int         resource_type)
2868 {
2869 	boolean_t has_thread_reference = FALSE;
2870 
2871 	if (thread == THREAD_NULL) {
2872 		thread = task_findtid(task, tid);
2873 		/* returns referenced thread */
2874 
2875 		if (thread == THREAD_NULL) {
2876 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2877 			    tid, 0, 0xdead, 0, 0);
2878 			return ESRCH;
2879 		}
2880 		has_thread_reference = TRUE;
2881 	} else {
2882 		assert(task == get_threadtask(thread));
2883 	}
2884 
2885 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2886 
2887 	if (has_thread_reference) {
2888 		thread_deallocate(thread);
2889 	}
2890 
2891 	return 0;
2892 }
2893 
2894 /* Deallocate before thread termination */
2895 void
proc_thread_qos_deallocate(thread_t thread)2896 proc_thread_qos_deallocate(thread_t thread)
2897 {
2898 	/* This thread must have no more IPC overrides. */
2899 	assert(thread->kevent_overrides == 0);
2900 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2901 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2902 
2903 	/*
2904 	 * Clear out any lingering override objects.
2905 	 */
2906 	struct thread_qos_override *override;
2907 
2908 	thread_mtx_lock(thread);
2909 	override = thread->overrides;
2910 	thread->overrides = NULL;
2911 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2912 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2913 	thread_mtx_unlock(thread);
2914 
2915 	while (override) {
2916 		struct thread_qos_override *override_next = override->override_next;
2917 
2918 		zfree(thread_qos_override_zone, override);
2919 		override = override_next;
2920 	}
2921 }
2922 
2923 /*
2924  * Set up the primordial thread's QoS
2925  */
2926 void
task_set_main_thread_qos(task_t task,thread_t thread)2927 task_set_main_thread_qos(task_t task, thread_t thread)
2928 {
2929 	struct task_pend_token pend_token = {};
2930 
2931 	assert(get_threadtask(thread) == task);
2932 
2933 	thread_mtx_lock(thread);
2934 
2935 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2936 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2937 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2938 	    thread->requested_policy.thrp_qos, 0);
2939 
2940 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2941 
2942 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2943 	    primordial_qos, 0, &pend_token);
2944 
2945 	thread_mtx_unlock(thread);
2946 
2947 	thread_policy_update_complete_unlocked(thread, &pend_token);
2948 
2949 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2950 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2951 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2952 	    primordial_qos, 0);
2953 }
2954 
2955 /*
2956  * KPI for pthread kext
2957  *
2958  * Return a good guess at what the initial manager QoS will be
2959  * Dispatch can override this in userspace if it so chooses
2960  */
2961 thread_qos_t
task_get_default_manager_qos(task_t task)2962 task_get_default_manager_qos(task_t task)
2963 {
2964 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2965 
2966 	if (primordial_qos == THREAD_QOS_LEGACY) {
2967 		primordial_qos = THREAD_QOS_USER_INITIATED;
2968 	}
2969 
2970 	return primordial_qos;
2971 }
2972 
2973 /*
2974  * Check if the kernel promotion on thread has changed
2975  * and apply it.
2976  *
2977  * thread locked on entry and exit
2978  */
2979 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2980 thread_recompute_kernel_promotion_locked(thread_t thread)
2981 {
2982 	boolean_t needs_update = FALSE;
2983 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2984 
2985 	/*
2986 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2987 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2988 	 * and propagates the priority through the chain with the same cap, because as of now it does
2989 	 * not differenciate on the kernel primitive.
2990 	 *
2991 	 * If this assumption will change with the adoption of a kernel primitive that does not
2992 	 * cap the when adding/propagating,
2993 	 * then here is the place to put the generic cap for all kernel primitives
2994 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2995 	 */
2996 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2997 
2998 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2999 		KDBG(MACHDBG_CODE(
3000 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3001 		    thread_tid(thread),
3002 		    kern_promotion_schedpri,
3003 		    thread->kern_promotion_schedpri);
3004 
3005 		needs_update = TRUE;
3006 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
3007 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
3008 	}
3009 
3010 	return needs_update;
3011 }
3012 
3013 /*
3014  * Check if the user promotion on thread has changed
3015  * and apply it.
3016  *
3017  * thread locked on entry, might drop the thread lock
3018  * and reacquire it.
3019  */
3020 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)3021 thread_recompute_user_promotion_locked(thread_t thread)
3022 {
3023 	boolean_t needs_update = FALSE;
3024 	struct task_pend_token pend_token = {};
3025 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3026 	int old_base_pri = thread->base_pri;
3027 	thread_qos_t qos_promotion;
3028 
3029 	/* Check if user promotion has changed */
3030 	if (thread->user_promotion_basepri == user_promotion_basepri) {
3031 		return needs_update;
3032 	} else {
3033 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3034 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3035 		    thread_tid(thread),
3036 		    user_promotion_basepri,
3037 		    thread->user_promotion_basepri,
3038 		    0, 0);
3039 		KDBG(MACHDBG_CODE(
3040 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3041 		    thread_tid(thread),
3042 		    user_promotion_basepri,
3043 		    thread->user_promotion_basepri);
3044 	}
3045 
3046 	/* Update the user promotion base pri */
3047 	thread->user_promotion_basepri = user_promotion_basepri;
3048 	pend_token.tpt_force_recompute_pri = 1;
3049 
3050 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3051 		qos_promotion = THREAD_QOS_UNSPECIFIED;
3052 	} else {
3053 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
3054 	}
3055 
3056 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3057 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
3058 
3059 	if (thread_get_waiting_turnstile(thread) &&
3060 	    thread->base_pri != old_base_pri) {
3061 		needs_update = TRUE;
3062 	}
3063 
3064 	thread_unlock(thread);
3065 
3066 	thread_policy_update_complete_unlocked(thread, &pend_token);
3067 
3068 	thread_lock(thread);
3069 
3070 	return needs_update;
3071 }
3072 
3073 /*
3074  * Convert the thread user promotion base pri to qos for threads in qos world.
3075  * For priority above UI qos, the qos would be set to UI.
3076  */
3077 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)3078 thread_user_promotion_qos_for_pri(int priority)
3079 {
3080 	thread_qos_t qos;
3081 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3082 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3083 			return qos;
3084 		}
3085 	}
3086 	return THREAD_QOS_MAINTENANCE;
3087 }
3088 
3089 /*
3090  * Set the thread's QoS Kevent override
3091  * Owned by the Kevent subsystem
3092  *
3093  * May be called with spinlocks held, but not spinlocks
3094  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3095  *
3096  * One 'add' must be balanced by one 'drop'.
3097  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3098  * Before the thread is deallocated, there must be 0 remaining overrides.
3099  */
3100 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3101 thread_kevent_override(thread_t    thread,
3102     uint32_t    qos_override,
3103     boolean_t   is_new_override)
3104 {
3105 	struct task_pend_token pend_token = {};
3106 	boolean_t needs_update;
3107 
3108 	spl_t s = splsched();
3109 	thread_lock(thread);
3110 
3111 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3112 
3113 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3114 	assert(qos_override < THREAD_QOS_LAST);
3115 
3116 	if (is_new_override) {
3117 		if (thread->kevent_overrides++ == 0) {
3118 			/* This add is the first override for this thread */
3119 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3120 		} else {
3121 			/* There are already other overrides in effect for this thread */
3122 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3123 		}
3124 	} else {
3125 		/* There must be at least one override (the previous add call) in effect */
3126 		assert(thread->kevent_overrides > 0);
3127 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3128 	}
3129 
3130 	/*
3131 	 * We can't allow lowering if there are several IPC overrides because
3132 	 * the caller can't possibly know the whole truth
3133 	 */
3134 	if (thread->kevent_overrides == 1) {
3135 		needs_update = qos_override != old_override;
3136 	} else {
3137 		needs_update = qos_override > old_override;
3138 	}
3139 
3140 	if (needs_update) {
3141 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3142 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3143 		    qos_override, 0, &pend_token);
3144 		assert(pend_token.tpt_update_sockets == 0);
3145 	}
3146 
3147 	thread_unlock(thread);
3148 	splx(s);
3149 
3150 	thread_policy_update_complete_unlocked(thread, &pend_token);
3151 }
3152 
3153 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3154 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3155 {
3156 	thread_kevent_override(thread, qos_override, TRUE);
3157 }
3158 
3159 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3160 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3161 {
3162 	thread_kevent_override(thread, qos_override, FALSE);
3163 }
3164 
3165 void
thread_drop_kevent_override(thread_t thread)3166 thread_drop_kevent_override(thread_t thread)
3167 {
3168 	struct task_pend_token pend_token = {};
3169 
3170 	spl_t s = splsched();
3171 	thread_lock(thread);
3172 
3173 	assert(thread->kevent_overrides > 0);
3174 
3175 	if (--thread->kevent_overrides == 0) {
3176 		/*
3177 		 * There are no more overrides for this thread, so we should
3178 		 * clear out the saturated override value
3179 		 */
3180 
3181 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3182 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3183 		    0, &pend_token);
3184 	}
3185 
3186 	thread_unlock(thread);
3187 	splx(s);
3188 
3189 	thread_policy_update_complete_unlocked(thread, &pend_token);
3190 }
3191 
3192 /*
3193  * Set the thread's QoS Workloop Servicer override
3194  * Owned by the Kevent subsystem
3195  *
3196  * May be called with spinlocks held, but not spinlocks
3197  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3198  *
3199  * One 'add' must be balanced by one 'drop'.
3200  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3201  * Before the thread is deallocated, there must be 0 remaining overrides.
3202  */
3203 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3204 thread_servicer_override(thread_t    thread,
3205     uint32_t    qos_override,
3206     boolean_t   is_new_override)
3207 {
3208 	struct task_pend_token pend_token = {};
3209 
3210 	spl_t s = splsched();
3211 	thread_lock(thread);
3212 
3213 	if (is_new_override) {
3214 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3215 	} else {
3216 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3217 	}
3218 
3219 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3220 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3221 	    qos_override, 0, &pend_token);
3222 
3223 	thread_unlock(thread);
3224 	splx(s);
3225 
3226 	assert(pend_token.tpt_update_sockets == 0);
3227 	thread_policy_update_complete_unlocked(thread, &pend_token);
3228 }
3229 
3230 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3231 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3232 {
3233 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3234 	assert(qos_override < THREAD_QOS_LAST);
3235 
3236 	thread_servicer_override(thread, qos_override, TRUE);
3237 }
3238 
3239 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3240 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3241 {
3242 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3243 	assert(qos_override < THREAD_QOS_LAST);
3244 
3245 	thread_servicer_override(thread, qos_override, FALSE);
3246 }
3247 
3248 void
thread_drop_servicer_override(thread_t thread)3249 thread_drop_servicer_override(thread_t thread)
3250 {
3251 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3252 }
3253 
3254 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3255 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3256 {
3257 	struct task_pend_token pend_token = {};
3258 	uint8_t current_iotier;
3259 
3260 	/* Check if the update is needed */
3261 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3262 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3263 
3264 	if (iotier_override == current_iotier) {
3265 		return;
3266 	}
3267 
3268 	spl_t s = splsched();
3269 	thread_lock(thread);
3270 
3271 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3272 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3273 	    iotier_override, 0, &pend_token);
3274 
3275 	thread_unlock(thread);
3276 	splx(s);
3277 
3278 	assert(pend_token.tpt_update_sockets == 0);
3279 	thread_policy_update_complete_unlocked(thread, &pend_token);
3280 }
3281 
3282 /* Get current requested qos / relpri, may be called from spinlock context */
3283 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3284 thread_get_requested_qos(thread_t thread, int *relpri)
3285 {
3286 	int relprio_value = 0;
3287 	thread_qos_t qos;
3288 
3289 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3290 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3291 	if (relpri) {
3292 		*relpri = -relprio_value;
3293 	}
3294 	return qos;
3295 }
3296 
3297 /*
3298  * This function will promote the thread priority
3299  * since exec could block other threads calling
3300  * proc_find on the proc. This boost must be removed
3301  * via call to thread_clear_exec_promotion.
3302  *
3303  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3304  */
3305 void
thread_set_exec_promotion(thread_t thread)3306 thread_set_exec_promotion(thread_t thread)
3307 {
3308 	spl_t s = splsched();
3309 	thread_lock(thread);
3310 
3311 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3312 
3313 	thread_unlock(thread);
3314 	splx(s);
3315 }
3316 
3317 /*
3318  * This function will clear the exec thread
3319  * promotion set on the thread by thread_set_exec_promotion.
3320  */
3321 void
thread_clear_exec_promotion(thread_t thread)3322 thread_clear_exec_promotion(thread_t thread)
3323 {
3324 	spl_t s = splsched();
3325 	thread_lock(thread);
3326 
3327 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3328 
3329 	thread_unlock(thread);
3330 	splx(s);
3331 }
3332 
3333 #if CONFIG_SCHED_RT_RESTRICT
3334 /*
3335  * flag set by -time-constraint-policy-restrict boot-arg to restrict use of
3336  * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3337  * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3338  */
3339 static TUNABLE(
3340 	bool,
3341 	restrict_time_constraint_policy,
3342 	"-time-constraint-policy-restrict",
3343 	false
3344 	);
3345 
3346 void
thread_rt_evaluate(thread_t thread)3347 thread_rt_evaluate(thread_t thread)
3348 {
3349 	/* If no restrictions are configured - nothing to do. */
3350 	if (!restrict_time_constraint_policy) {
3351 		return;
3352 	}
3353 
3354 	/* User threads only. */
3355 	if (get_threadtask(thread) == kernel_task) {
3356 		return;
3357 	}
3358 
3359 	spl_t s = splsched();
3360 	thread_lock(thread);
3361 
3362 	const thread_work_interval_flags_t flags =
3363 	    os_atomic_load(&thread->th_work_interval_flags, relaxed);
3364 
3365 	/*
3366 	 * RT threads are demoted if they are no longer joined to a work
3367 	 * interval which has the RT_ALLOWED flag set (and not already demoted).
3368 	 */
3369 	if (((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) == 0) &&
3370 	    ((flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) &&
3371 	    (thread->sched_mode == TH_MODE_REALTIME || thread->saved_mode == TH_MODE_REALTIME)) {
3372 		sched_thread_mode_demote(thread, TH_SFLAG_RT_RESTRICTED);
3373 	}
3374 
3375 	/*
3376 	 * If demoted and joined to a work interval which allows RT threads,
3377 	 * then undemote.
3378 	 */
3379 	if (((thread->sched_flags & TH_SFLAG_RT_RESTRICTED) != 0) &&
3380 	    ((flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) != 0)) {
3381 		sched_thread_mode_undemote(thread, TH_SFLAG_RT_RESTRICTED);
3382 	}
3383 
3384 	thread_unlock(thread);
3385 	splx(s);
3386 }
3387 #else
3388 
3389 void
thread_rt_evaluate(__unused thread_t thread)3390 thread_rt_evaluate(__unused thread_t thread)
3391 {
3392 }
3393 #endif /*  CONFIG_SCHED_RT_RESTRICT */
3394