xref: /xnu-8020.140.41/osfmk/kern/thread_policy.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41 
42 #include <mach/machine/sdt.h>
43 
44 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
45     struct thread_qos_override, KT_DEFAULT);
46 
47 #ifdef MACH_BSD
48 extern int      proc_selfpid(void);
49 extern char *   proc_name_address(void *p);
50 extern void     rethrottle_thread(void * uthread);
51 #endif /* MACH_BSD */
52 
53 #define QOS_EXTRACT(q)        ((q) & 0xff)
54 
55 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
56 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
59 
60 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
61     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
62 
63 static void
64 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
65 
66 const int thread_default_iotier_override  = THROTTLE_LEVEL_END;
67 
68 const struct thread_requested_policy default_thread_requested_policy = {
69 	.thrp_iotier_kevent_override = thread_default_iotier_override
70 };
71 
72 /*
73  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
74  * to threads that don't have a QoS class set.
75  */
76 const qos_policy_params_t thread_qos_policy_params = {
77 	/*
78 	 * This table defines the starting base priority of the thread,
79 	 * which will be modified by the thread importance and the task max priority
80 	 * before being applied.
81 	 */
82 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
83 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
84 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
85 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
86 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
87 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
88 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
89 
90 	/*
91 	 * This table defines the highest IO priority that a thread marked with this
92 	 * QoS class can have.
93 	 */
94 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
95 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
96 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
97 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
98 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
99 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
100 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
101 
102 	/*
103 	 * This table defines the highest QoS level that
104 	 * a thread marked with this QoS class can have.
105 	 */
106 
107 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
108 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
109 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
110 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
112 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
113 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 
115 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
116 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
117 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
118 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
120 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 };
123 
124 static void
125 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
126 
127 static int
128 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
129 
130 static void
131 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
132 
133 static void
134 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
135 
136 static void
137 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
138 
139 static void
140 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
141 
142 static int
143 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
144 
145 static int
146 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
147 
148 static void
149 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
150 
151 static void
152 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
153 
154 boolean_t
thread_has_qos_policy(thread_t thread)155 thread_has_qos_policy(thread_t thread)
156 {
157 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
158 }
159 
160 
161 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)162 thread_remove_qos_policy_locked(thread_t thread,
163     task_pend_token_t pend_token)
164 {
165 	__unused int prev_qos = thread->requested_policy.thrp_qos;
166 
167 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
168 
169 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
170 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
171 }
172 
173 kern_return_t
thread_remove_qos_policy(thread_t thread)174 thread_remove_qos_policy(thread_t thread)
175 {
176 	struct task_pend_token pend_token = {};
177 
178 	thread_mtx_lock(thread);
179 	if (!thread->active) {
180 		thread_mtx_unlock(thread);
181 		return KERN_TERMINATED;
182 	}
183 
184 	thread_remove_qos_policy_locked(thread, &pend_token);
185 
186 	thread_mtx_unlock(thread);
187 
188 	thread_policy_update_complete_unlocked(thread, &pend_token);
189 
190 	return KERN_SUCCESS;
191 }
192 
193 
194 boolean_t
thread_is_static_param(thread_t thread)195 thread_is_static_param(thread_t thread)
196 {
197 	if (thread->static_param) {
198 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
199 		return TRUE;
200 	}
201 	return FALSE;
202 }
203 
204 /*
205  * Relative priorities can range between 0REL and -15REL. These
206  * map to QoS-specific ranges, to create non-overlapping priority
207  * ranges.
208  */
209 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)210 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
211 {
212 	int next_lower_qos;
213 
214 	/* Fast path, since no validation or scaling is needed */
215 	if (qos_relprio == 0) {
216 		return 0;
217 	}
218 
219 	switch (qos) {
220 	case THREAD_QOS_USER_INTERACTIVE:
221 		next_lower_qos = THREAD_QOS_USER_INITIATED;
222 		break;
223 	case THREAD_QOS_USER_INITIATED:
224 		next_lower_qos = THREAD_QOS_LEGACY;
225 		break;
226 	case THREAD_QOS_LEGACY:
227 		next_lower_qos = THREAD_QOS_UTILITY;
228 		break;
229 	case THREAD_QOS_UTILITY:
230 		next_lower_qos = THREAD_QOS_BACKGROUND;
231 		break;
232 	case THREAD_QOS_MAINTENANCE:
233 	case THREAD_QOS_BACKGROUND:
234 		next_lower_qos = 0;
235 		break;
236 	default:
237 		panic("Unrecognized QoS %d", qos);
238 		return 0;
239 	}
240 
241 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
242 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
243 
244 	/*
245 	 * We now have the valid range that the scaled relative priority can map to. Note
246 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
247 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
248 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
249 	 * remainder.
250 	 */
251 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
252 
253 	return scaled_relprio;
254 }
255 
256 /*
257  * flag set by -qos-policy-allow boot-arg to allow
258  * testing thread qos policy from userspace
259  */
260 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
261 
262 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)263 thread_policy_set(
264 	thread_t                                thread,
265 	thread_policy_flavor_t  flavor,
266 	thread_policy_t                 policy_info,
267 	mach_msg_type_number_t  count)
268 {
269 	thread_qos_policy_data_t req_qos;
270 	kern_return_t kr;
271 
272 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
273 
274 	if (thread == THREAD_NULL) {
275 		return KERN_INVALID_ARGUMENT;
276 	}
277 
278 	if (!allow_qos_policy_set) {
279 		if (thread_is_static_param(thread)) {
280 			return KERN_POLICY_STATIC;
281 		}
282 
283 		if (flavor == THREAD_QOS_POLICY) {
284 			return KERN_INVALID_ARGUMENT;
285 		}
286 
287 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
288 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
289 				return KERN_INVALID_ARGUMENT;
290 			}
291 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
292 			if (info->priority != BASEPRI_RTQUEUES) {
293 				return KERN_INVALID_ARGUMENT;
294 			}
295 		}
296 	}
297 
298 	/* Threads without static_param set reset their QoS when other policies are applied. */
299 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
300 		/* Store the existing tier, if we fail this call it is used to reset back. */
301 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
302 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
303 
304 		kr = thread_remove_qos_policy(thread);
305 		if (kr != KERN_SUCCESS) {
306 			return kr;
307 		}
308 	}
309 
310 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
311 
312 	/* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
313 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
314 		if (kr != KERN_SUCCESS) {
315 			/* Reset back to our original tier as the set failed. */
316 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
317 		}
318 	}
319 
320 	return kr;
321 }
322 
323 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
324 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
325 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
326 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
327 
328 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)329 thread_policy_set_internal(
330 	thread_t                     thread,
331 	thread_policy_flavor_t       flavor,
332 	thread_policy_t              policy_info,
333 	mach_msg_type_number_t       count)
334 {
335 	kern_return_t result = KERN_SUCCESS;
336 	struct task_pend_token pend_token = {};
337 
338 	thread_mtx_lock(thread);
339 	if (!thread->active) {
340 		thread_mtx_unlock(thread);
341 
342 		return KERN_TERMINATED;
343 	}
344 
345 	switch (flavor) {
346 	case THREAD_EXTENDED_POLICY:
347 	{
348 		boolean_t timeshare = TRUE;
349 
350 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
351 			thread_extended_policy_t info;
352 
353 			info = (thread_extended_policy_t)policy_info;
354 			timeshare = info->timeshare;
355 		}
356 
357 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
358 
359 		spl_t s = splsched();
360 		thread_lock(thread);
361 
362 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
363 
364 		thread_unlock(thread);
365 		splx(s);
366 
367 		pend_token.tpt_update_thread_sfi = 1;
368 
369 		break;
370 	}
371 
372 	case THREAD_TIME_CONSTRAINT_POLICY:
373 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
374 	{
375 		thread_time_constraint_with_priority_policy_t info;
376 
377 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
378 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
379 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
380 
381 		if (count < min_count) {
382 			result = KERN_INVALID_ARGUMENT;
383 			break;
384 		}
385 
386 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
387 
388 
389 		if (info->constraint < info->computation ||
390 		    info->computation > max_rt_quantum ||
391 		    info->computation < min_rt_quantum) {
392 			result = KERN_INVALID_ARGUMENT;
393 			break;
394 		}
395 
396 		if (info->computation < (info->constraint / 2)) {
397 			info->computation = (info->constraint / 2);
398 			if (info->computation > max_rt_quantum) {
399 				info->computation = max_rt_quantum;
400 			}
401 		}
402 
403 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
404 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
405 				result = KERN_INVALID_ARGUMENT;
406 				break;
407 			}
408 		}
409 
410 		spl_t s = splsched();
411 		thread_lock(thread);
412 
413 		thread->realtime.period          = info->period;
414 		thread->realtime.computation     = info->computation;
415 		thread->realtime.constraint      = info->constraint;
416 		thread->realtime.preemptible     = info->preemptible;
417 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
418 			thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
419 		} else {
420 			thread->realtime.priority_offset = 0;
421 			/* Or check for override from allowed thread group? */
422 		}
423 
424 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
425 
426 		thread_unlock(thread);
427 		splx(s);
428 
429 		pend_token.tpt_update_thread_sfi = 1;
430 
431 		break;
432 	}
433 
434 	case THREAD_PRECEDENCE_POLICY:
435 	{
436 		thread_precedence_policy_t info;
437 
438 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
439 			result = KERN_INVALID_ARGUMENT;
440 			break;
441 		}
442 		info = (thread_precedence_policy_t)policy_info;
443 
444 		spl_t s = splsched();
445 		thread_lock(thread);
446 
447 		thread->importance = info->importance;
448 
449 		thread_recompute_priority(thread);
450 
451 		thread_unlock(thread);
452 		splx(s);
453 
454 		break;
455 	}
456 
457 	case THREAD_AFFINITY_POLICY:
458 	{
459 		thread_affinity_policy_t info;
460 
461 		if (!thread_affinity_is_supported()) {
462 			result = KERN_NOT_SUPPORTED;
463 			break;
464 		}
465 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
466 			result = KERN_INVALID_ARGUMENT;
467 			break;
468 		}
469 
470 		info = (thread_affinity_policy_t) policy_info;
471 		/*
472 		 * Unlock the thread mutex here and
473 		 * return directly after calling thread_affinity_set().
474 		 * This is necessary for correct lock ordering because
475 		 * thread_affinity_set() takes the task lock.
476 		 */
477 		thread_mtx_unlock(thread);
478 		return thread_affinity_set(thread, info->affinity_tag);
479 	}
480 
481 #if !defined(XNU_TARGET_OS_OSX)
482 	case THREAD_BACKGROUND_POLICY:
483 	{
484 		thread_background_policy_t info;
485 
486 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
487 			result = KERN_INVALID_ARGUMENT;
488 			break;
489 		}
490 
491 		if (get_threadtask(thread) != current_task()) {
492 			result = KERN_PROTECTION_FAILURE;
493 			break;
494 		}
495 
496 		info = (thread_background_policy_t) policy_info;
497 
498 		int enable;
499 
500 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
501 			enable = TASK_POLICY_ENABLE;
502 		} else {
503 			enable = TASK_POLICY_DISABLE;
504 		}
505 
506 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
507 
508 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
509 
510 		break;
511 	}
512 #endif /* !defined(XNU_TARGET_OS_OSX) */
513 
514 	case THREAD_THROUGHPUT_QOS_POLICY:
515 	{
516 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
517 		thread_throughput_qos_t tqos;
518 
519 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
520 			result = KERN_INVALID_ARGUMENT;
521 			break;
522 		}
523 
524 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
525 			break;
526 		}
527 
528 		tqos = qos_extract(info->thread_throughput_qos_tier);
529 
530 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
531 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
532 
533 		break;
534 	}
535 
536 	case THREAD_LATENCY_QOS_POLICY:
537 	{
538 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
539 		thread_latency_qos_t lqos;
540 
541 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
542 			result = KERN_INVALID_ARGUMENT;
543 			break;
544 		}
545 
546 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
547 			break;
548 		}
549 
550 		lqos = qos_extract(info->thread_latency_qos_tier);
551 
552 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
553 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
554 
555 		break;
556 	}
557 
558 	case THREAD_QOS_POLICY:
559 	{
560 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
561 
562 		if (count < THREAD_QOS_POLICY_COUNT) {
563 			result = KERN_INVALID_ARGUMENT;
564 			break;
565 		}
566 
567 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
568 			result = KERN_INVALID_ARGUMENT;
569 			break;
570 		}
571 
572 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
573 			result = KERN_INVALID_ARGUMENT;
574 			break;
575 		}
576 
577 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
578 			result = KERN_INVALID_ARGUMENT;
579 			break;
580 		}
581 
582 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
583 		    info->qos_tier, -info->tier_importance, &pend_token);
584 
585 		break;
586 	}
587 
588 	default:
589 		result = KERN_INVALID_ARGUMENT;
590 		break;
591 	}
592 
593 	thread_mtx_unlock(thread);
594 
595 	thread_policy_update_complete_unlocked(thread, &pend_token);
596 
597 	return result;
598 }
599 
600 /*
601  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
602  * Both result in FIXED mode scheduling.
603  */
604 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)605 convert_policy_to_sched_mode(integer_t policy)
606 {
607 	switch (policy) {
608 	case POLICY_TIMESHARE:
609 		return TH_MODE_TIMESHARE;
610 	case POLICY_RR:
611 	case POLICY_FIFO:
612 		return TH_MODE_FIXED;
613 	default:
614 		panic("unexpected sched policy: %d", policy);
615 		return TH_MODE_NONE;
616 	}
617 }
618 
619 /*
620  * Called either with the thread mutex locked
621  * or from the pthread kext in a 'safe place'.
622  */
623 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)624 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
625     sched_mode_t          mode,
626     integer_t             priority,
627     task_pend_token_t     pend_token)
628 {
629 	kern_return_t kr = KERN_SUCCESS;
630 
631 	spl_t s = splsched();
632 	thread_lock(thread);
633 
634 	/* This path isn't allowed to change a thread out of realtime. */
635 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
636 	    (thread->saved_mode == TH_MODE_REALTIME)) {
637 		kr = KERN_FAILURE;
638 		goto unlock;
639 	}
640 
641 	if (thread->policy_reset) {
642 		kr = KERN_SUCCESS;
643 		goto unlock;
644 	}
645 
646 	sched_mode_t old_mode = thread->sched_mode;
647 
648 	/*
649 	 * Reverse engineer and apply the correct importance value
650 	 * from the requested absolute priority value.
651 	 *
652 	 * TODO: Store the absolute priority value instead
653 	 */
654 
655 	if (priority >= thread->max_priority) {
656 		priority = thread->max_priority - thread->task_priority;
657 	} else if (priority >= MINPRI_KERNEL) {
658 		priority -=  MINPRI_KERNEL;
659 	} else if (priority >= MINPRI_RESERVED) {
660 		priority -=  MINPRI_RESERVED;
661 	} else {
662 		priority -= BASEPRI_DEFAULT;
663 	}
664 
665 	priority += thread->task_priority;
666 
667 	if (priority > thread->max_priority) {
668 		priority = thread->max_priority;
669 	} else if (priority < MINPRI) {
670 		priority = MINPRI;
671 	}
672 
673 	thread->importance = priority - thread->task_priority;
674 
675 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
676 
677 	if (mode != old_mode) {
678 		pend_token->tpt_update_thread_sfi = 1;
679 	}
680 
681 unlock:
682 	thread_unlock(thread);
683 	splx(s);
684 
685 	return kr;
686 }
687 
688 void
thread_freeze_base_pri(thread_t thread)689 thread_freeze_base_pri(thread_t thread)
690 {
691 	assert(thread == current_thread());
692 
693 	spl_t s = splsched();
694 	thread_lock(thread);
695 
696 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
697 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
698 
699 	thread_unlock(thread);
700 	splx(s);
701 }
702 
703 bool
thread_unfreeze_base_pri(thread_t thread)704 thread_unfreeze_base_pri(thread_t thread)
705 {
706 	assert(thread == current_thread());
707 	integer_t base_pri;
708 	ast_t ast = 0;
709 
710 	spl_t s = splsched();
711 	thread_lock(thread);
712 
713 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
714 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
715 
716 	base_pri = thread->req_base_pri;
717 	if (base_pri != thread->base_pri) {
718 		/*
719 		 * This function returns "true" if the base pri change
720 		 * is the most likely cause for the preemption.
721 		 */
722 		sched_set_thread_base_priority(thread, base_pri);
723 		ast = ast_peek(AST_PREEMPT);
724 	}
725 
726 	thread_unlock(thread);
727 	splx(s);
728 
729 	return ast != 0;
730 }
731 
732 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)733 thread_workq_pri_for_qos(thread_qos_t qos)
734 {
735 	assert(qos < THREAD_QOS_LAST);
736 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
737 }
738 
739 thread_qos_t
thread_workq_qos_for_pri(int priority)740 thread_workq_qos_for_pri(int priority)
741 {
742 	thread_qos_t qos;
743 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
744 		// indicate that workq should map >UI threads to workq's
745 		// internal notation for above-UI work.
746 		return THREAD_QOS_UNSPECIFIED;
747 	}
748 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
749 		// map a given priority up to the next nearest qos band.
750 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
751 			return qos;
752 		}
753 	}
754 	return THREAD_QOS_MAINTENANCE;
755 }
756 
757 /*
758  * private interface for pthread workqueues
759  *
760  * Set scheduling policy & absolute priority for thread
761  * May be called with spinlocks held
762  * Thread mutex lock is not held
763  */
764 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)765 thread_reset_workq_qos(thread_t thread, uint32_t qos)
766 {
767 	struct task_pend_token pend_token = {};
768 
769 	assert(qos < THREAD_QOS_LAST);
770 
771 	spl_t s = splsched();
772 	thread_lock(thread);
773 
774 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
775 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
776 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
777 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
778 	    &pend_token);
779 
780 	assert(pend_token.tpt_update_sockets == 0);
781 
782 	thread_unlock(thread);
783 	splx(s);
784 
785 	thread_policy_update_complete_unlocked(thread, &pend_token);
786 }
787 
788 /*
789  * private interface for pthread workqueues
790  *
791  * Set scheduling policy & absolute priority for thread
792  * May be called with spinlocks held
793  * Thread mutex lock is held
794  */
795 void
thread_set_workq_override(thread_t thread,uint32_t qos)796 thread_set_workq_override(thread_t thread, uint32_t qos)
797 {
798 	struct task_pend_token pend_token = {};
799 
800 	assert(qos < THREAD_QOS_LAST);
801 
802 	spl_t s = splsched();
803 	thread_lock(thread);
804 
805 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
806 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
807 
808 	assert(pend_token.tpt_update_sockets == 0);
809 
810 	thread_unlock(thread);
811 	splx(s);
812 
813 	thread_policy_update_complete_unlocked(thread, &pend_token);
814 }
815 
816 /*
817  * private interface for pthread workqueues
818  *
819  * Set scheduling policy & absolute priority for thread
820  * May be called with spinlocks held
821  * Thread mutex lock is not held
822  */
823 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)824 thread_set_workq_pri(thread_t  thread,
825     thread_qos_t qos,
826     integer_t priority,
827     integer_t policy)
828 {
829 	struct task_pend_token pend_token = {};
830 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
831 
832 	assert(qos < THREAD_QOS_LAST);
833 	assert(thread->static_param);
834 
835 	if (!thread->static_param || !thread->active) {
836 		return;
837 	}
838 
839 	spl_t s = splsched();
840 	thread_lock(thread);
841 
842 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
843 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
844 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
845 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
846 	    0, &pend_token);
847 
848 	thread_unlock(thread);
849 	splx(s);
850 
851 	/* Concern: this doesn't hold the mutex... */
852 
853 	__assert_only kern_return_t kr;
854 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
855 	    &pend_token);
856 	assert(kr == KERN_SUCCESS);
857 
858 	if (pend_token.tpt_update_thread_sfi) {
859 		sfi_reevaluate(thread);
860 	}
861 }
862 
863 /*
864  * thread_set_mode_and_absolute_pri:
865  *
866  * Set scheduling policy & absolute priority for thread, for deprecated
867  * thread_set_policy and thread_policy interfaces.
868  *
869  * Called with nothing locked.
870  */
871 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)872 thread_set_mode_and_absolute_pri(thread_t   thread,
873     integer_t  policy,
874     integer_t  priority)
875 {
876 	kern_return_t kr = KERN_SUCCESS;
877 	struct task_pend_token pend_token = {};
878 
879 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
880 
881 	thread_mtx_lock(thread);
882 
883 	if (!thread->active) {
884 		kr = KERN_TERMINATED;
885 		goto unlock;
886 	}
887 
888 	if (thread_is_static_param(thread)) {
889 		kr = KERN_POLICY_STATIC;
890 		goto unlock;
891 	}
892 
893 	/* Setting legacy policies on threads kills the current QoS */
894 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
895 		thread_remove_qos_policy_locked(thread, &pend_token);
896 	}
897 
898 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
899 
900 unlock:
901 	thread_mtx_unlock(thread);
902 
903 	thread_policy_update_complete_unlocked(thread, &pend_token);
904 
905 	return kr;
906 }
907 
908 /*
909  * Set the thread's requested mode and recompute priority
910  * Called with thread mutex and thread locked
911  *
912  * TODO: Mitigate potential problems caused by moving thread to end of runq
913  * whenever its priority is recomputed
914  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
915  */
916 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)917 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
918 {
919 	if (thread->policy_reset) {
920 		return;
921 	}
922 
923 	boolean_t removed = thread_run_queue_remove(thread);
924 
925 	/*
926 	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
927 	 * That way there's zero confusion over which the user wants
928 	 * and which the kernel wants.
929 	 */
930 	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
931 		thread->saved_mode = mode;
932 	} else {
933 		sched_set_thread_mode(thread, mode);
934 	}
935 
936 	thread_recompute_priority(thread);
937 
938 	if (removed) {
939 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
940 	}
941 }
942 
943 /* called at splsched with thread lock locked */
944 static void
thread_update_qos_cpu_time_locked(thread_t thread)945 thread_update_qos_cpu_time_locked(thread_t thread)
946 {
947 	task_t task = get_threadtask(thread);
948 	uint64_t timer_sum, timer_delta;
949 
950 	/*
951 	 * This is only as accurate as the distance between
952 	 * last context switch (embedded) or last user/kernel boundary transition (desktop)
953 	 * because user_timer and system_timer are only updated then.
954 	 *
955 	 * TODO: Consider running a timer_update operation here to update it first.
956 	 *       Maybe doable with interrupts disabled from current thread.
957 	 *       If the thread is on a different core, may not be easy to get right.
958 	 *
959 	 * TODO: There should be a function for this in timer.c
960 	 */
961 
962 	timer_sum = timer_grab(&thread->user_timer);
963 	timer_sum += timer_grab(&thread->system_timer);
964 	timer_delta = timer_sum - thread->vtimer_qos_save;
965 
966 	thread->vtimer_qos_save = timer_sum;
967 
968 	uint64_t* task_counter = NULL;
969 
970 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
971 	switch (thread->effective_policy.thep_qos) {
972 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
973 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
974 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
975 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
976 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
977 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
978 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
979 	default:
980 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
981 	}
982 
983 	OSAddAtomic64(timer_delta, task_counter);
984 
985 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
986 	switch (thread->requested_policy.thrp_qos) {
987 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
988 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
989 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
990 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
991 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
992 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
993 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
994 	default:
995 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
996 	}
997 
998 	OSAddAtomic64(timer_delta, task_counter);
999 }
1000 
1001 /*
1002  * called with no thread locks held
1003  * may hold task lock
1004  */
1005 void
thread_update_qos_cpu_time(thread_t thread)1006 thread_update_qos_cpu_time(thread_t thread)
1007 {
1008 	thread_mtx_lock(thread);
1009 
1010 	spl_t s = splsched();
1011 	thread_lock(thread);
1012 
1013 	thread_update_qos_cpu_time_locked(thread);
1014 
1015 	thread_unlock(thread);
1016 	splx(s);
1017 
1018 	thread_mtx_unlock(thread);
1019 }
1020 
1021 /*
1022  * Calculate base priority from thread attributes, and set it on the thread
1023  *
1024  * Called with thread_lock and thread mutex held.
1025  */
1026 extern boolean_t vps_dynamic_priority_enabled;
1027 
1028 void
thread_recompute_priority(thread_t thread)1029 thread_recompute_priority(
1030 	thread_t                thread)
1031 {
1032 	integer_t               priority;
1033 
1034 	if (thread->policy_reset) {
1035 		return;
1036 	}
1037 
1038 	if (thread->sched_mode == TH_MODE_REALTIME) {
1039 		uint8_t i = thread->realtime.priority_offset;
1040 		assert((i >= 0) && (i < NRTQS));
1041 		priority = BASEPRI_RTQUEUES + i;
1042 		sched_set_thread_base_priority(thread, priority);
1043 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1044 			/* Make sure the thread has a valid deadline */
1045 			uint64_t ctime = mach_absolute_time();
1046 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1047 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1048 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1049 		}
1050 		return;
1051 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1052 		int qos = thread->effective_policy.thep_qos;
1053 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1054 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1055 		int qos_scaled_relprio;
1056 
1057 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1058 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1059 
1060 		priority = thread_qos_policy_params.qos_pri[qos];
1061 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1062 
1063 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1064 			/* Bump priority 46 to 47 when in a frontmost app */
1065 			qos_scaled_relprio += 1;
1066 		}
1067 
1068 		/* TODO: factor in renice priority here? */
1069 
1070 		priority += qos_scaled_relprio;
1071 	} else {
1072 		if (thread->importance > MAXPRI) {
1073 			priority = MAXPRI;
1074 		} else if (thread->importance < -MAXPRI) {
1075 			priority = -MAXPRI;
1076 		} else {
1077 			priority = thread->importance;
1078 		}
1079 
1080 		priority += thread->task_priority;
1081 	}
1082 
1083 	priority = MAX(priority, thread->user_promotion_basepri);
1084 
1085 	/*
1086 	 * Clamp priority back into the allowed range for this task.
1087 	 *  The initial priority value could be out of this range due to:
1088 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1089 	 *      Task is user task (max-pri is 63)
1090 	 *      Task is kernel task (max-pri is 95)
1091 	 * Note that thread->importance is user-settable to any integer
1092 	 * via THREAD_PRECEDENCE_POLICY.
1093 	 */
1094 	if (priority > thread->max_priority) {
1095 		if (thread->effective_policy.thep_promote_above_task) {
1096 			priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1097 		} else {
1098 			priority = thread->max_priority;
1099 		}
1100 	} else if (priority < MINPRI) {
1101 		priority = MINPRI;
1102 	}
1103 
1104 	if (thread->saved_mode == TH_MODE_REALTIME &&
1105 	    thread->sched_flags & TH_SFLAG_FAILSAFE) {
1106 		priority = DEPRESSPRI;
1107 	}
1108 
1109 	if (thread->effective_policy.thep_terminated == TRUE) {
1110 		/*
1111 		 * We temporarily want to override the expected priority to
1112 		 * ensure that the thread exits in a timely manner.
1113 		 * Note that this is allowed to exceed thread->max_priority
1114 		 * so that the thread is no longer clamped to background
1115 		 * during the final exit phase.
1116 		 */
1117 		if (priority < thread->task_priority) {
1118 			priority = thread->task_priority;
1119 		}
1120 		if (priority < BASEPRI_DEFAULT) {
1121 			priority = BASEPRI_DEFAULT;
1122 		}
1123 	}
1124 
1125 #if !defined(XNU_TARGET_OS_OSX)
1126 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1127 	if (priority < MAXPRI_THROTTLE) {
1128 		priority = MAXPRI_THROTTLE;
1129 	}
1130 #endif /* !defined(XNU_TARGET_OS_OSX) */
1131 
1132 	sched_set_thread_base_priority(thread, priority);
1133 }
1134 
1135 /* Called with the task lock held, but not the thread mutex or spinlock */
1136 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1137 thread_policy_update_tasklocked(
1138 	thread_t           thread,
1139 	integer_t          priority,
1140 	integer_t          max_priority,
1141 	task_pend_token_t  pend_token)
1142 {
1143 	thread_mtx_lock(thread);
1144 
1145 	if (!thread->active || thread->policy_reset) {
1146 		thread_mtx_unlock(thread);
1147 		return;
1148 	}
1149 
1150 	spl_t s = splsched();
1151 	thread_lock(thread);
1152 
1153 	__unused
1154 	integer_t old_max_priority = thread->max_priority;
1155 
1156 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1157 	thread->task_priority = (int16_t)priority;
1158 
1159 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1160 	thread->max_priority = (int16_t)max_priority;
1161 
1162 	/*
1163 	 * When backgrounding a thread, realtime and fixed priority threads
1164 	 * should be demoted to timeshare background threads.
1165 	 *
1166 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1167 	 * remove/reinsert for a runnable thread
1168 	 */
1169 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1170 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1171 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1172 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1173 	}
1174 
1175 	thread_policy_update_spinlocked(thread, true, pend_token);
1176 
1177 	thread_unlock(thread);
1178 	splx(s);
1179 
1180 	thread_mtx_unlock(thread);
1181 }
1182 
1183 /*
1184  * Reset thread to default state in preparation for termination
1185  * Called with thread mutex locked
1186  *
1187  * Always called on current thread, so we don't need a run queue remove
1188  */
1189 void
thread_policy_reset(thread_t thread)1190 thread_policy_reset(
1191 	thread_t                thread)
1192 {
1193 	spl_t           s;
1194 
1195 	assert(thread == current_thread());
1196 
1197 	s = splsched();
1198 	thread_lock(thread);
1199 
1200 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1201 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1202 	}
1203 
1204 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1205 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1206 	}
1207 
1208 	/* At this point, the various demotions should be inactive */
1209 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1210 	assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1211 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1212 
1213 	/* Reset thread back to task-default basepri and mode  */
1214 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1215 
1216 	sched_set_thread_mode(thread, newmode);
1217 
1218 	thread->importance = 0;
1219 
1220 	/* Prevent further changes to thread base priority or mode */
1221 	thread->policy_reset = 1;
1222 
1223 	sched_set_thread_base_priority(thread, thread->task_priority);
1224 
1225 	thread_unlock(thread);
1226 	splx(s);
1227 }
1228 
1229 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1230 thread_policy_get(
1231 	thread_t                                thread,
1232 	thread_policy_flavor_t  flavor,
1233 	thread_policy_t                 policy_info,
1234 	mach_msg_type_number_t  *count,
1235 	boolean_t                               *get_default)
1236 {
1237 	kern_return_t                   result = KERN_SUCCESS;
1238 
1239 	if (thread == THREAD_NULL) {
1240 		return KERN_INVALID_ARGUMENT;
1241 	}
1242 
1243 	thread_mtx_lock(thread);
1244 	if (!thread->active) {
1245 		thread_mtx_unlock(thread);
1246 
1247 		return KERN_TERMINATED;
1248 	}
1249 
1250 	switch (flavor) {
1251 	case THREAD_EXTENDED_POLICY:
1252 	{
1253 		boolean_t               timeshare = TRUE;
1254 
1255 		if (!(*get_default)) {
1256 			spl_t s = splsched();
1257 			thread_lock(thread);
1258 
1259 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1260 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1261 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1262 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1263 				} else {
1264 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1265 				}
1266 			} else {
1267 				*get_default = TRUE;
1268 			}
1269 
1270 			thread_unlock(thread);
1271 			splx(s);
1272 		}
1273 
1274 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1275 			thread_extended_policy_t        info;
1276 
1277 			info = (thread_extended_policy_t)policy_info;
1278 			info->timeshare = timeshare;
1279 		}
1280 
1281 		break;
1282 	}
1283 
1284 	case THREAD_TIME_CONSTRAINT_POLICY:
1285 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1286 	{
1287 		thread_time_constraint_with_priority_policy_t         info;
1288 
1289 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1290 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1291 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1292 
1293 		if (*count < min_count) {
1294 			result = KERN_INVALID_ARGUMENT;
1295 			break;
1296 		}
1297 
1298 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1299 
1300 		if (!(*get_default)) {
1301 			spl_t s = splsched();
1302 			thread_lock(thread);
1303 
1304 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1305 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1306 				info->period = thread->realtime.period;
1307 				info->computation = thread->realtime.computation;
1308 				info->constraint = thread->realtime.constraint;
1309 				info->preemptible = thread->realtime.preemptible;
1310 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1311 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1312 				}
1313 			} else {
1314 				*get_default = TRUE;
1315 			}
1316 
1317 			thread_unlock(thread);
1318 			splx(s);
1319 		}
1320 
1321 		if (*get_default) {
1322 			info->period = 0;
1323 			info->computation = default_timeshare_computation;
1324 			info->constraint = default_timeshare_constraint;
1325 			info->preemptible = TRUE;
1326 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1327 				info->priority = BASEPRI_RTQUEUES;
1328 			}
1329 		}
1330 
1331 
1332 		break;
1333 	}
1334 
1335 	case THREAD_PRECEDENCE_POLICY:
1336 	{
1337 		thread_precedence_policy_t              info;
1338 
1339 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1340 			result = KERN_INVALID_ARGUMENT;
1341 			break;
1342 		}
1343 
1344 		info = (thread_precedence_policy_t)policy_info;
1345 
1346 		if (!(*get_default)) {
1347 			spl_t s = splsched();
1348 			thread_lock(thread);
1349 
1350 			info->importance = thread->importance;
1351 
1352 			thread_unlock(thread);
1353 			splx(s);
1354 		} else {
1355 			info->importance = 0;
1356 		}
1357 
1358 		break;
1359 	}
1360 
1361 	case THREAD_AFFINITY_POLICY:
1362 	{
1363 		thread_affinity_policy_t                info;
1364 
1365 		if (!thread_affinity_is_supported()) {
1366 			result = KERN_NOT_SUPPORTED;
1367 			break;
1368 		}
1369 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1370 			result = KERN_INVALID_ARGUMENT;
1371 			break;
1372 		}
1373 
1374 		info = (thread_affinity_policy_t)policy_info;
1375 
1376 		if (!(*get_default)) {
1377 			info->affinity_tag = thread_affinity_get(thread);
1378 		} else {
1379 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1380 		}
1381 
1382 		break;
1383 	}
1384 
1385 	case THREAD_POLICY_STATE:
1386 	{
1387 		thread_policy_state_t           info;
1388 
1389 		if (*count < THREAD_POLICY_STATE_COUNT) {
1390 			result = KERN_INVALID_ARGUMENT;
1391 			break;
1392 		}
1393 
1394 		/* Only root can get this info */
1395 		if (!task_is_privileged(current_task())) {
1396 			result = KERN_PROTECTION_FAILURE;
1397 			break;
1398 		}
1399 
1400 		info = (thread_policy_state_t)(void*)policy_info;
1401 
1402 		if (!(*get_default)) {
1403 			info->flags = 0;
1404 
1405 			spl_t s = splsched();
1406 			thread_lock(thread);
1407 
1408 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1409 
1410 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1411 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1412 
1413 			info->thps_user_promotions          = 0;
1414 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1415 			info->thps_ipc_overrides            = thread->kevent_overrides;
1416 
1417 			proc_get_thread_policy_bitfield(thread, info);
1418 
1419 			thread_unlock(thread);
1420 			splx(s);
1421 		} else {
1422 			info->requested = 0;
1423 			info->effective = 0;
1424 			info->pending = 0;
1425 		}
1426 
1427 		break;
1428 	}
1429 
1430 	case THREAD_LATENCY_QOS_POLICY:
1431 	{
1432 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1433 		thread_latency_qos_t plqos;
1434 
1435 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1436 			result = KERN_INVALID_ARGUMENT;
1437 			break;
1438 		}
1439 
1440 		if (*get_default) {
1441 			plqos = 0;
1442 		} else {
1443 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1444 		}
1445 
1446 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1447 	}
1448 	break;
1449 
1450 	case THREAD_THROUGHPUT_QOS_POLICY:
1451 	{
1452 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1453 		thread_throughput_qos_t ptqos;
1454 
1455 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1456 			result = KERN_INVALID_ARGUMENT;
1457 			break;
1458 		}
1459 
1460 		if (*get_default) {
1461 			ptqos = 0;
1462 		} else {
1463 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1464 		}
1465 
1466 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1467 	}
1468 	break;
1469 
1470 	case THREAD_QOS_POLICY:
1471 	{
1472 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1473 
1474 		if (*count < THREAD_QOS_POLICY_COUNT) {
1475 			result = KERN_INVALID_ARGUMENT;
1476 			break;
1477 		}
1478 
1479 		if (!(*get_default)) {
1480 			int relprio_value = 0;
1481 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1482 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1483 
1484 			info->tier_importance = -relprio_value;
1485 		} else {
1486 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1487 			info->tier_importance = 0;
1488 		}
1489 
1490 		break;
1491 	}
1492 
1493 	default:
1494 		result = KERN_INVALID_ARGUMENT;
1495 		break;
1496 	}
1497 
1498 	thread_mtx_unlock(thread);
1499 
1500 	return result;
1501 }
1502 
1503 void
thread_policy_create(thread_t thread)1504 thread_policy_create(thread_t thread)
1505 {
1506 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1507 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1508 	    thread_tid(thread), theffective_0(thread),
1509 	    theffective_1(thread), thread->base_pri, 0);
1510 
1511 	/* We pass a pend token but ignore it */
1512 	struct task_pend_token pend_token = {};
1513 
1514 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1515 
1516 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1517 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1518 	    thread_tid(thread), theffective_0(thread),
1519 	    theffective_1(thread), thread->base_pri, 0);
1520 }
1521 
1522 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1523 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1524 {
1525 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1526 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1527 	    thread_tid(thread), theffective_0(thread),
1528 	    theffective_1(thread), thread->base_pri, 0);
1529 
1530 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1531 
1532 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1533 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1534 	    thread_tid(thread), theffective_0(thread),
1535 	    theffective_1(thread), thread->base_pri, 0);
1536 }
1537 
1538 
1539 
1540 /*
1541  * One thread state update function TO RULE THEM ALL
1542  *
1543  * This function updates the thread effective policy fields
1544  * and pushes the results to the relevant subsystems.
1545  *
1546  * Returns TRUE if a pended action needs to be run.
1547  *
1548  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1549  */
1550 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1551 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1552     task_pend_token_t pend_token)
1553 {
1554 	/*
1555 	 * Step 1:
1556 	 *  Gather requested policy and effective task state
1557 	 */
1558 
1559 	struct thread_requested_policy requested = thread->requested_policy;
1560 	struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1561 
1562 	/*
1563 	 * Step 2:
1564 	 *  Calculate new effective policies from requested policy, task and thread state
1565 	 *  Rules:
1566 	 *      Don't change requested, it won't take effect
1567 	 */
1568 
1569 	struct thread_effective_policy next = {};
1570 
1571 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1572 
1573 	uint32_t next_qos = requested.thrp_qos;
1574 
1575 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1576 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1577 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1578 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1579 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1580 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1581 	}
1582 
1583 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1584 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1585 		/*
1586 		 * This thread is turnstile-boosted higher than the adaptive clamp
1587 		 * by a synchronous waiter. Allow that to override the adaptive
1588 		 * clamp temporarily for this thread only.
1589 		 */
1590 		next.thep_promote_above_task = true;
1591 		next_qos = requested.thrp_qos_promote;
1592 	}
1593 
1594 	next.thep_qos = next_qos;
1595 
1596 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1597 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1598 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1599 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1600 		} else {
1601 			next.thep_qos = task_effective.tep_qos_clamp;
1602 		}
1603 	}
1604 
1605 	/*
1606 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1607 	 * This allows QoS promotions to work properly even after the process is unclamped.
1608 	 */
1609 	next.thep_qos_promote = next.thep_qos;
1610 
1611 	/* The ceiling only applies to threads that are in the QoS world */
1612 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1613 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1614 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1615 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1616 	}
1617 
1618 	/*
1619 	 * The QoS relative priority is only applicable when the original programmer's
1620 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1621 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1622 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1623 	 * presence of boosting, the programmer doesn't know what other actors
1624 	 * are boosting the thread.
1625 	 */
1626 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1627 	    (requested.thrp_qos == next.thep_qos) &&
1628 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1629 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1630 	} else {
1631 		next.thep_qos_relprio = 0;
1632 	}
1633 
1634 	/* Calculate DARWIN_BG */
1635 	bool wants_darwinbg        = false;
1636 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1637 
1638 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1639 		wants_darwinbg = true;
1640 	}
1641 
1642 	/*
1643 	 * If DARWIN_BG has been requested at either level, it's engaged.
1644 	 * darwinbg threads always create bg sockets,
1645 	 * but only some types of darwinbg change the sockets
1646 	 * after they're created
1647 	 */
1648 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1649 		wants_all_sockets_bg = wants_darwinbg = true;
1650 	}
1651 
1652 	if (requested.thrp_pidbind_bg) {
1653 		wants_all_sockets_bg = wants_darwinbg = true;
1654 	}
1655 
1656 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1657 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1658 		wants_darwinbg = true;
1659 	}
1660 
1661 	/* Calculate side effects of DARWIN_BG */
1662 
1663 	if (wants_darwinbg) {
1664 		next.thep_darwinbg = 1;
1665 	}
1666 
1667 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1668 		next.thep_new_sockets_bg = 1;
1669 	}
1670 
1671 	/* Don't use task_effective.tep_all_sockets_bg here */
1672 	if (wants_all_sockets_bg) {
1673 		next.thep_all_sockets_bg = 1;
1674 	}
1675 
1676 	/* darwinbg implies background QOS (or lower) */
1677 	if (next.thep_darwinbg &&
1678 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1679 		next.thep_qos = THREAD_QOS_BACKGROUND;
1680 		next.thep_qos_relprio = 0;
1681 	}
1682 
1683 	/* Calculate IO policy */
1684 
1685 	int iopol = THROTTLE_LEVEL_TIER0;
1686 
1687 	/* Factor in the task's IO policy */
1688 	if (next.thep_darwinbg) {
1689 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1690 	}
1691 
1692 	if (!next.thep_promote_above_task) {
1693 		iopol = MAX(iopol, task_effective.tep_io_tier);
1694 	}
1695 
1696 	/* Look up the associated IO tier value for the QoS class */
1697 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1698 
1699 	iopol = MAX(iopol, requested.thrp_int_iotier);
1700 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1701 
1702 	/* Apply the kevent iotier override */
1703 	iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1704 
1705 	next.thep_io_tier = iopol;
1706 
1707 	/*
1708 	 * If a QoS override is causing IO to go into a lower tier, we also set
1709 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1710 	 * window when the override goes away.
1711 	 */
1712 
1713 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1714 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1715 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1716 
1717 	/* Calculate Passive IO policy */
1718 	if (requested.thrp_ext_iopassive ||
1719 	    requested.thrp_int_iopassive ||
1720 	    qos_io_override_active ||
1721 	    task_effective.tep_io_passive) {
1722 		next.thep_io_passive = 1;
1723 	}
1724 
1725 	/* Calculate timer QOS */
1726 	uint32_t latency_qos = requested.thrp_latency_qos;
1727 
1728 	if (!next.thep_promote_above_task) {
1729 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1730 	}
1731 
1732 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1733 
1734 	next.thep_latency_qos = latency_qos;
1735 
1736 	/* Calculate throughput QOS */
1737 	uint32_t through_qos = requested.thrp_through_qos;
1738 
1739 	if (!next.thep_promote_above_task) {
1740 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1741 	}
1742 
1743 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1744 
1745 	next.thep_through_qos = through_qos;
1746 
1747 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1748 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1749 		next.thep_terminated    = 1;
1750 		next.thep_darwinbg      = 0;
1751 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1752 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1753 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1754 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1755 	}
1756 
1757 	/*
1758 	 * Step 3:
1759 	 *  Swap out old policy for new policy
1760 	 */
1761 
1762 	struct thread_effective_policy prev = thread->effective_policy;
1763 
1764 	thread_update_qos_cpu_time_locked(thread);
1765 
1766 	/* This is the point where the new values become visible to other threads */
1767 	thread->effective_policy = next;
1768 
1769 	/*
1770 	 * Step 4:
1771 	 *  Pend updates that can't be done while holding the thread lock
1772 	 */
1773 
1774 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1775 		pend_token->tpt_update_sockets = 1;
1776 	}
1777 
1778 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1779 	if (prev.thep_io_tier != next.thep_io_tier) {
1780 		pend_token->tpt_update_throttle = 1;
1781 	}
1782 
1783 	/*
1784 	 * Check for the attributes that sfi_thread_classify() consults,
1785 	 *  and trigger SFI re-evaluation.
1786 	 */
1787 	if (prev.thep_qos != next.thep_qos ||
1788 	    prev.thep_darwinbg != next.thep_darwinbg) {
1789 		pend_token->tpt_update_thread_sfi = 1;
1790 	}
1791 
1792 	integer_t old_base_pri = thread->base_pri;
1793 
1794 	/*
1795 	 * Step 5:
1796 	 *  Update other subsystems as necessary if something has changed
1797 	 */
1798 
1799 	/* Check for the attributes that thread_recompute_priority() consults */
1800 	if (prev.thep_qos != next.thep_qos ||
1801 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1802 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1803 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1804 	    prev.thep_terminated != next.thep_terminated ||
1805 	    pend_token->tpt_force_recompute_pri == 1 ||
1806 	    recompute_priority) {
1807 		thread_recompute_priority(thread);
1808 	}
1809 
1810 	/*
1811 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1812 	 */
1813 	if (pend_token->tpt_update_turnstile &&
1814 	    ((old_base_pri == thread->base_pri) ||
1815 	    !thread_get_waiting_turnstile(thread))) {
1816 		/*
1817 		 * Reset update turnstile pend token since either
1818 		 * the thread priority did not change or thread is
1819 		 * not blocked on a turnstile.
1820 		 */
1821 		pend_token->tpt_update_turnstile = 0;
1822 	}
1823 }
1824 
1825 
1826 /*
1827  * Initiate a thread policy state transition on a thread with its TID
1828  * Useful if you cannot guarantee the thread won't get terminated
1829  * Precondition: No locks are held
1830  * Will take task lock - using the non-tid variant is faster
1831  * if you already have a thread ref.
1832  */
1833 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1834 proc_set_thread_policy_with_tid(task_t     task,
1835     uint64_t   tid,
1836     int        category,
1837     int        flavor,
1838     int        value)
1839 {
1840 	/* takes task lock, returns ref'ed thread or NULL */
1841 	thread_t thread = task_findtid(task, tid);
1842 
1843 	if (thread == THREAD_NULL) {
1844 		return;
1845 	}
1846 
1847 	proc_set_thread_policy(thread, category, flavor, value);
1848 
1849 	thread_deallocate(thread);
1850 }
1851 
1852 /*
1853  * Initiate a thread policy transition on a thread
1854  * This path supports networking transitions (i.e. darwinbg transitions)
1855  * Precondition: No locks are held
1856  */
1857 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1858 proc_set_thread_policy(thread_t   thread,
1859     int        category,
1860     int        flavor,
1861     int        value)
1862 {
1863 	struct task_pend_token pend_token = {};
1864 
1865 	thread_mtx_lock(thread);
1866 
1867 	proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1868 
1869 	thread_mtx_unlock(thread);
1870 
1871 	thread_policy_update_complete_unlocked(thread, &pend_token);
1872 }
1873 
1874 /*
1875  * Do the things that can't be done while holding a thread mutex.
1876  * These are set up to call back into thread policy to get the latest value,
1877  * so they don't have to be synchronized with the update.
1878  * The only required semantic is 'call this sometime after updating effective policy'
1879  *
1880  * Precondition: Thread mutex is not held
1881  *
1882  * This may be called with the task lock held, but in that case it won't be
1883  * called with tpt_update_sockets set.
1884  */
1885 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1886 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1887 {
1888 #ifdef MACH_BSD
1889 	if (pend_token->tpt_update_sockets) {
1890 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1891 	}
1892 #endif /* MACH_BSD */
1893 
1894 	if (pend_token->tpt_update_throttle) {
1895 		rethrottle_thread(get_bsdthread_info(thread));
1896 	}
1897 
1898 	if (pend_token->tpt_update_thread_sfi) {
1899 		sfi_reevaluate(thread);
1900 	}
1901 
1902 	if (pend_token->tpt_update_turnstile) {
1903 		turnstile_update_thread_priority_chain(thread);
1904 	}
1905 }
1906 
1907 /*
1908  * Set and update thread policy
1909  * Thread mutex might be held
1910  */
1911 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1912 proc_set_thread_policy_locked(thread_t          thread,
1913     int               category,
1914     int               flavor,
1915     int               value,
1916     int               value2,
1917     task_pend_token_t pend_token)
1918 {
1919 	spl_t s = splsched();
1920 	thread_lock(thread);
1921 
1922 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1923 
1924 	thread_unlock(thread);
1925 	splx(s);
1926 }
1927 
1928 /*
1929  * Set and update thread policy
1930  * Thread spinlock is held
1931  */
1932 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1933 proc_set_thread_policy_spinlocked(thread_t          thread,
1934     int               category,
1935     int               flavor,
1936     int               value,
1937     int               value2,
1938     task_pend_token_t pend_token)
1939 {
1940 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1941 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1942 	    thread_tid(thread), threquested_0(thread),
1943 	    threquested_1(thread), value, 0);
1944 
1945 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1946 
1947 	thread_policy_update_spinlocked(thread, false, pend_token);
1948 
1949 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1950 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1951 	    thread_tid(thread), threquested_0(thread),
1952 	    threquested_1(thread), tpending(pend_token), 0);
1953 }
1954 
1955 /*
1956  * Set the requested state for a specific flavor to a specific value.
1957  */
1958 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1959 thread_set_requested_policy_spinlocked(thread_t     thread,
1960     int               category,
1961     int               flavor,
1962     int               value,
1963     int               value2,
1964     task_pend_token_t pend_token)
1965 {
1966 	int tier, passive;
1967 
1968 	struct thread_requested_policy requested = thread->requested_policy;
1969 
1970 	switch (flavor) {
1971 	/* Category: EXTERNAL and INTERNAL, thread and task */
1972 
1973 	case TASK_POLICY_DARWIN_BG:
1974 		if (category == TASK_POLICY_EXTERNAL) {
1975 			requested.thrp_ext_darwinbg = value;
1976 		} else {
1977 			requested.thrp_int_darwinbg = value;
1978 		}
1979 		break;
1980 
1981 	case TASK_POLICY_IOPOL:
1982 		proc_iopol_to_tier(value, &tier, &passive);
1983 		if (category == TASK_POLICY_EXTERNAL) {
1984 			requested.thrp_ext_iotier  = tier;
1985 			requested.thrp_ext_iopassive = passive;
1986 		} else {
1987 			requested.thrp_int_iotier  = tier;
1988 			requested.thrp_int_iopassive = passive;
1989 		}
1990 		break;
1991 
1992 	case TASK_POLICY_IO:
1993 		if (category == TASK_POLICY_EXTERNAL) {
1994 			requested.thrp_ext_iotier = value;
1995 		} else {
1996 			requested.thrp_int_iotier = value;
1997 		}
1998 		break;
1999 
2000 	case TASK_POLICY_PASSIVE_IO:
2001 		if (category == TASK_POLICY_EXTERNAL) {
2002 			requested.thrp_ext_iopassive = value;
2003 		} else {
2004 			requested.thrp_int_iopassive = value;
2005 		}
2006 		break;
2007 
2008 	/* Category: ATTRIBUTE, thread only */
2009 
2010 	case TASK_POLICY_PIDBIND_BG:
2011 		assert(category == TASK_POLICY_ATTRIBUTE);
2012 		requested.thrp_pidbind_bg = value;
2013 		break;
2014 
2015 	case TASK_POLICY_LATENCY_QOS:
2016 		assert(category == TASK_POLICY_ATTRIBUTE);
2017 		requested.thrp_latency_qos = value;
2018 		break;
2019 
2020 	case TASK_POLICY_THROUGH_QOS:
2021 		assert(category == TASK_POLICY_ATTRIBUTE);
2022 		requested.thrp_through_qos = value;
2023 		break;
2024 
2025 	case TASK_POLICY_QOS_OVERRIDE:
2026 		assert(category == TASK_POLICY_ATTRIBUTE);
2027 		requested.thrp_qos_override = value;
2028 		pend_token->tpt_update_turnstile = 1;
2029 		break;
2030 
2031 	case TASK_POLICY_QOS_AND_RELPRIO:
2032 		assert(category == TASK_POLICY_ATTRIBUTE);
2033 		requested.thrp_qos = value;
2034 		requested.thrp_qos_relprio = value2;
2035 		pend_token->tpt_update_turnstile = 1;
2036 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2037 		break;
2038 
2039 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2040 		assert(category == TASK_POLICY_ATTRIBUTE);
2041 		requested.thrp_qos_workq_override = value;
2042 		pend_token->tpt_update_turnstile = 1;
2043 		break;
2044 
2045 	case TASK_POLICY_QOS_PROMOTE:
2046 		assert(category == TASK_POLICY_ATTRIBUTE);
2047 		requested.thrp_qos_promote = value;
2048 		break;
2049 
2050 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2051 		assert(category == TASK_POLICY_ATTRIBUTE);
2052 		requested.thrp_qos_kevent_override = value;
2053 		pend_token->tpt_update_turnstile = 1;
2054 		break;
2055 
2056 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2057 		assert(category == TASK_POLICY_ATTRIBUTE);
2058 		requested.thrp_qos_wlsvc_override = value;
2059 		pend_token->tpt_update_turnstile = 1;
2060 		break;
2061 
2062 	case TASK_POLICY_TERMINATED:
2063 		assert(category == TASK_POLICY_ATTRIBUTE);
2064 		requested.thrp_terminated = value;
2065 		break;
2066 
2067 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2068 		assert(category == TASK_POLICY_ATTRIBUTE);
2069 		requested.thrp_iotier_kevent_override = value;
2070 		break;
2071 
2072 	default:
2073 		panic("unknown task policy: %d %d %d", category, flavor, value);
2074 		break;
2075 	}
2076 
2077 	thread->requested_policy = requested;
2078 }
2079 
2080 /*
2081  * Gets what you set. Effective values may be different.
2082  * Precondition: No locks are held
2083  */
2084 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2085 proc_get_thread_policy(thread_t   thread,
2086     int        category,
2087     int        flavor)
2088 {
2089 	int value = 0;
2090 	thread_mtx_lock(thread);
2091 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2092 	thread_mtx_unlock(thread);
2093 	return value;
2094 }
2095 
2096 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2097 proc_get_thread_policy_locked(thread_t   thread,
2098     int        category,
2099     int        flavor,
2100     int*       value2)
2101 {
2102 	int value = 0;
2103 
2104 	spl_t s = splsched();
2105 	thread_lock(thread);
2106 
2107 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2108 
2109 	thread_unlock(thread);
2110 	splx(s);
2111 
2112 	return value;
2113 }
2114 
2115 /*
2116  * Gets what you set. Effective values may be different.
2117  */
2118 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2119 thread_get_requested_policy_spinlocked(thread_t thread,
2120     int      category,
2121     int      flavor,
2122     int*     value2)
2123 {
2124 	int value = 0;
2125 
2126 	struct thread_requested_policy requested = thread->requested_policy;
2127 
2128 	switch (flavor) {
2129 	case TASK_POLICY_DARWIN_BG:
2130 		if (category == TASK_POLICY_EXTERNAL) {
2131 			value = requested.thrp_ext_darwinbg;
2132 		} else {
2133 			value = requested.thrp_int_darwinbg;
2134 		}
2135 		break;
2136 	case TASK_POLICY_IOPOL:
2137 		if (category == TASK_POLICY_EXTERNAL) {
2138 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2139 			    requested.thrp_ext_iopassive);
2140 		} else {
2141 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2142 			    requested.thrp_int_iopassive);
2143 		}
2144 		break;
2145 	case TASK_POLICY_IO:
2146 		if (category == TASK_POLICY_EXTERNAL) {
2147 			value = requested.thrp_ext_iotier;
2148 		} else {
2149 			value = requested.thrp_int_iotier;
2150 		}
2151 		break;
2152 	case TASK_POLICY_PASSIVE_IO:
2153 		if (category == TASK_POLICY_EXTERNAL) {
2154 			value = requested.thrp_ext_iopassive;
2155 		} else {
2156 			value = requested.thrp_int_iopassive;
2157 		}
2158 		break;
2159 	case TASK_POLICY_QOS:
2160 		assert(category == TASK_POLICY_ATTRIBUTE);
2161 		value = requested.thrp_qos;
2162 		break;
2163 	case TASK_POLICY_QOS_OVERRIDE:
2164 		assert(category == TASK_POLICY_ATTRIBUTE);
2165 		value = requested.thrp_qos_override;
2166 		break;
2167 	case TASK_POLICY_LATENCY_QOS:
2168 		assert(category == TASK_POLICY_ATTRIBUTE);
2169 		value = requested.thrp_latency_qos;
2170 		break;
2171 	case TASK_POLICY_THROUGH_QOS:
2172 		assert(category == TASK_POLICY_ATTRIBUTE);
2173 		value = requested.thrp_through_qos;
2174 		break;
2175 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2176 		assert(category == TASK_POLICY_ATTRIBUTE);
2177 		value = requested.thrp_qos_workq_override;
2178 		break;
2179 	case TASK_POLICY_QOS_AND_RELPRIO:
2180 		assert(category == TASK_POLICY_ATTRIBUTE);
2181 		assert(value2 != NULL);
2182 		value = requested.thrp_qos;
2183 		*value2 = requested.thrp_qos_relprio;
2184 		break;
2185 	case TASK_POLICY_QOS_PROMOTE:
2186 		assert(category == TASK_POLICY_ATTRIBUTE);
2187 		value = requested.thrp_qos_promote;
2188 		break;
2189 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2190 		assert(category == TASK_POLICY_ATTRIBUTE);
2191 		value = requested.thrp_qos_kevent_override;
2192 		break;
2193 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2194 		assert(category == TASK_POLICY_ATTRIBUTE);
2195 		value = requested.thrp_qos_wlsvc_override;
2196 		break;
2197 	case TASK_POLICY_TERMINATED:
2198 		assert(category == TASK_POLICY_ATTRIBUTE);
2199 		value = requested.thrp_terminated;
2200 		break;
2201 	case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2202 		assert(category == TASK_POLICY_ATTRIBUTE);
2203 		value = requested.thrp_iotier_kevent_override;
2204 		break;
2205 
2206 	default:
2207 		panic("unknown policy_flavor %d", flavor);
2208 		break;
2209 	}
2210 
2211 	return value;
2212 }
2213 
2214 /*
2215  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2216  *
2217  * NOTE: This accessor does not take the task or thread lock.
2218  * Notifications of state updates need to be externally synchronized with state queries.
2219  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2220  * within the context of a timer interrupt.
2221  *
2222  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2223  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2224  *      I don't think that cost is worth not having the right answer.
2225  */
2226 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2227 proc_get_effective_thread_policy(thread_t thread,
2228     int      flavor)
2229 {
2230 	int value = 0;
2231 
2232 	switch (flavor) {
2233 	case TASK_POLICY_DARWIN_BG:
2234 		/*
2235 		 * This call is used within the timer layer, as well as
2236 		 * prioritizing requests to the graphics system.
2237 		 * It also informs SFI and originator-bg-state.
2238 		 * Returns 1 for background mode, 0 for normal mode
2239 		 */
2240 
2241 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2242 		break;
2243 	case TASK_POLICY_IO:
2244 		/*
2245 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2246 		 * Returns THROTTLE_LEVEL_* values
2247 		 */
2248 		value = thread->effective_policy.thep_io_tier;
2249 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2250 			value = MIN(value, thread->iotier_override);
2251 		}
2252 		break;
2253 	case TASK_POLICY_PASSIVE_IO:
2254 		/*
2255 		 * The I/O system calls here to find out whether an operation should be passive.
2256 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2257 		 * Returns 1 for passive mode, 0 for normal mode
2258 		 *
2259 		 * If an override is causing IO to go into a lower tier, we also set
2260 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2261 		 * window when the override goes away.
2262 		 */
2263 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2264 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2265 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2266 			value = 1;
2267 		}
2268 		break;
2269 	case TASK_POLICY_ALL_SOCKETS_BG:
2270 		/*
2271 		 * do_background_socket() calls this to determine whether
2272 		 * it should change the thread's sockets
2273 		 * Returns 1 for background mode, 0 for normal mode
2274 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2275 		 * doesn't get you out of the network throttle.
2276 		 */
2277 		value = (thread->effective_policy.thep_all_sockets_bg ||
2278 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2279 		break;
2280 	case TASK_POLICY_NEW_SOCKETS_BG:
2281 		/*
2282 		 * socreate() calls this to determine if it should mark a new socket as background
2283 		 * Returns 1 for background mode, 0 for normal mode
2284 		 */
2285 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2286 		break;
2287 	case TASK_POLICY_LATENCY_QOS:
2288 		/*
2289 		 * timer arming calls into here to find out the timer coalescing level
2290 		 * Returns a latency QoS tier (0-6)
2291 		 */
2292 		value = thread->effective_policy.thep_latency_qos;
2293 		break;
2294 	case TASK_POLICY_THROUGH_QOS:
2295 		/*
2296 		 * This value is passed into the urgency callout from the scheduler
2297 		 * to the performance management subsystem.
2298 		 *
2299 		 * Returns a throughput QoS tier (0-6)
2300 		 */
2301 		value = thread->effective_policy.thep_through_qos;
2302 		break;
2303 	case TASK_POLICY_QOS:
2304 		/*
2305 		 * This is communicated to the performance management layer and SFI.
2306 		 *
2307 		 * Returns a QoS policy tier
2308 		 */
2309 		value = thread->effective_policy.thep_qos;
2310 		break;
2311 	default:
2312 		panic("unknown thread policy flavor %d", flavor);
2313 		break;
2314 	}
2315 
2316 	return value;
2317 }
2318 
2319 
2320 /*
2321  * (integer_t) casts limit the number of bits we can fit here
2322  * this interface is deprecated and replaced by the _EXT struct ?
2323  */
2324 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2325 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2326 {
2327 	uint64_t bits = 0;
2328 	struct thread_requested_policy requested = thread->requested_policy;
2329 
2330 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2331 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2332 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2333 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2334 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2335 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2336 
2337 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2338 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2339 
2340 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2341 
2342 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2343 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2344 
2345 	info->requested = (integer_t) bits;
2346 	bits = 0;
2347 
2348 	struct thread_effective_policy effective = thread->effective_policy;
2349 
2350 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2351 
2352 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2353 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2354 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2355 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2356 
2357 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2358 
2359 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2360 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2361 
2362 	info->effective = (integer_t)bits;
2363 	bits = 0;
2364 
2365 	info->pending = 0;
2366 }
2367 
2368 /*
2369  * Sneakily trace either the task and thread requested
2370  * or just the thread requested, depending on if we have enough room.
2371  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2372  *
2373  *                                LP32            LP64
2374  * threquested_0(thread)          thread[0]       task[0]
2375  * threquested_1(thread)          thread[1]       thread[0]
2376  *
2377  */
2378 
2379 uintptr_t
threquested_0(thread_t thread)2380 threquested_0(thread_t thread)
2381 {
2382 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2383 
2384 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2385 
2386 	return raw[0];
2387 }
2388 
2389 uintptr_t
threquested_1(thread_t thread)2390 threquested_1(thread_t thread)
2391 {
2392 #if defined __LP64__
2393 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2394 #else
2395 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2396 	return raw[1];
2397 #endif
2398 }
2399 
2400 uintptr_t
theffective_0(thread_t thread)2401 theffective_0(thread_t thread)
2402 {
2403 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2404 
2405 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2406 	return raw[0];
2407 }
2408 
2409 uintptr_t
theffective_1(thread_t thread)2410 theffective_1(thread_t thread)
2411 {
2412 #if defined __LP64__
2413 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2414 #else
2415 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2416 	return raw[1];
2417 #endif
2418 }
2419 
2420 
2421 /*
2422  * Set an override on the thread which is consulted with a
2423  * higher priority than the task/thread policy. This should
2424  * only be set for temporary grants until the thread
2425  * returns to the userspace boundary
2426  *
2427  * We use atomic operations to swap in the override, with
2428  * the assumption that the thread itself can
2429  * read the override and clear it on return to userspace.
2430  *
2431  * No locking is performed, since it is acceptable to see
2432  * a stale override for one loop through throttle_lowpri_io().
2433  * However a thread reference must be held on the thread.
2434  */
2435 
2436 void
set_thread_iotier_override(thread_t thread,int policy)2437 set_thread_iotier_override(thread_t thread, int policy)
2438 {
2439 	int current_override;
2440 
2441 	/* Let most aggressive I/O policy win until user boundary */
2442 	do {
2443 		current_override = thread->iotier_override;
2444 
2445 		if (current_override != THROTTLE_LEVEL_NONE) {
2446 			policy = MIN(current_override, policy);
2447 		}
2448 
2449 		if (current_override == policy) {
2450 			/* no effective change */
2451 			return;
2452 		}
2453 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2454 
2455 	/*
2456 	 * Since the thread may be currently throttled,
2457 	 * re-evaluate tiers and potentially break out
2458 	 * of an msleep
2459 	 */
2460 	rethrottle_thread(get_bsdthread_info(thread));
2461 }
2462 
2463 /*
2464  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2465  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2466  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2467  * priority thread. In these cases, we attempt to propagate the priority token, as long
2468  * as the subsystem informs us of the relationships between the threads. The userspace
2469  * synchronization subsystem should maintain the information of owner->resource and
2470  * resource->waiters itself.
2471  */
2472 
2473 /*
2474  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2475  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2476  * to be handled specially in the future, but for now it's fine to slam
2477  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2478  */
2479 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2480 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2481 {
2482 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2483 		/* Map all input resource/type to a single one */
2484 		*resource = USER_ADDR_NULL;
2485 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2486 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2487 		/* no transform */
2488 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2489 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2490 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2491 			*resource = USER_ADDR_NULL;
2492 		}
2493 	}
2494 }
2495 
2496 /* This helper routine finds an existing override if known. Locking should be done by caller */
2497 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2498 find_qos_override(thread_t thread,
2499     user_addr_t resource,
2500     int resource_type)
2501 {
2502 	struct thread_qos_override *override;
2503 
2504 	override = thread->overrides;
2505 	while (override) {
2506 		if (override->override_resource == resource &&
2507 		    override->override_resource_type == resource_type) {
2508 			return override;
2509 		}
2510 
2511 		override = override->override_next;
2512 	}
2513 
2514 	return NULL;
2515 }
2516 
2517 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2518 find_and_decrement_qos_override(thread_t       thread,
2519     user_addr_t    resource,
2520     int            resource_type,
2521     boolean_t      reset,
2522     struct thread_qos_override **free_override_list)
2523 {
2524 	struct thread_qos_override *override, *override_prev;
2525 
2526 	override_prev = NULL;
2527 	override = thread->overrides;
2528 	while (override) {
2529 		struct thread_qos_override *override_next = override->override_next;
2530 
2531 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2532 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2533 			if (reset) {
2534 				override->override_contended_resource_count = 0;
2535 			} else {
2536 				override->override_contended_resource_count--;
2537 			}
2538 
2539 			if (override->override_contended_resource_count == 0) {
2540 				if (override_prev == NULL) {
2541 					thread->overrides = override_next;
2542 				} else {
2543 					override_prev->override_next = override_next;
2544 				}
2545 
2546 				/* Add to out-param for later zfree */
2547 				override->override_next = *free_override_list;
2548 				*free_override_list = override;
2549 			} else {
2550 				override_prev = override;
2551 			}
2552 
2553 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2554 				return;
2555 			}
2556 		} else {
2557 			override_prev = override;
2558 		}
2559 
2560 		override = override_next;
2561 	}
2562 }
2563 
2564 /* This helper recalculates the current requested override using the policy selected at boot */
2565 static int
calculate_requested_qos_override(thread_t thread)2566 calculate_requested_qos_override(thread_t thread)
2567 {
2568 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2569 		return THREAD_QOS_UNSPECIFIED;
2570 	}
2571 
2572 	/* iterate over all overrides and calculate MAX */
2573 	struct thread_qos_override *override;
2574 	int qos_override = THREAD_QOS_UNSPECIFIED;
2575 
2576 	override = thread->overrides;
2577 	while (override) {
2578 		qos_override = MAX(qos_override, override->override_qos);
2579 		override = override->override_next;
2580 	}
2581 
2582 	return qos_override;
2583 }
2584 
2585 /*
2586  * Returns:
2587  * - 0 on success
2588  * - EINVAL if some invalid input was passed
2589  */
2590 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2591 proc_thread_qos_add_override_internal(thread_t         thread,
2592     int              override_qos,
2593     boolean_t        first_override_for_resource,
2594     user_addr_t      resource,
2595     int              resource_type)
2596 {
2597 	struct task_pend_token pend_token = {};
2598 	int rc = 0;
2599 
2600 	thread_mtx_lock(thread);
2601 
2602 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2603 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2604 
2605 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2606 	    uint64_t, thread->requested_policy.thrp_qos,
2607 	    uint64_t, thread->effective_policy.thep_qos,
2608 	    int, override_qos, boolean_t, first_override_for_resource);
2609 
2610 	struct thread_qos_override *override;
2611 	struct thread_qos_override *override_new = NULL;
2612 	int new_qos_override, prev_qos_override;
2613 	int new_effective_qos;
2614 
2615 	canonicalize_resource_and_type(&resource, &resource_type);
2616 
2617 	override = find_qos_override(thread, resource, resource_type);
2618 	if (first_override_for_resource && !override) {
2619 		/* We need to allocate a new object. Drop the thread lock and
2620 		 * recheck afterwards in case someone else added the override
2621 		 */
2622 		thread_mtx_unlock(thread);
2623 		override_new = zalloc(thread_qos_override_zone);
2624 		thread_mtx_lock(thread);
2625 		override = find_qos_override(thread, resource, resource_type);
2626 	}
2627 	if (first_override_for_resource && override) {
2628 		/* Someone else already allocated while the thread lock was dropped */
2629 		override->override_contended_resource_count++;
2630 	} else if (!override && override_new) {
2631 		override = override_new;
2632 		override_new = NULL;
2633 		override->override_next = thread->overrides;
2634 		/* since first_override_for_resource was TRUE */
2635 		override->override_contended_resource_count = 1;
2636 		override->override_resource = resource;
2637 		override->override_resource_type = (int16_t)resource_type;
2638 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2639 		thread->overrides = override;
2640 	}
2641 
2642 	if (override) {
2643 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2644 			override->override_qos = (int16_t)override_qos;
2645 		} else {
2646 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2647 		}
2648 	}
2649 
2650 	/* Determine how to combine the various overrides into a single current
2651 	 * requested override
2652 	 */
2653 	new_qos_override = calculate_requested_qos_override(thread);
2654 
2655 	prev_qos_override = proc_get_thread_policy_locked(thread,
2656 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2657 
2658 	if (new_qos_override != prev_qos_override) {
2659 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2660 		    TASK_POLICY_QOS_OVERRIDE,
2661 		    new_qos_override, 0, &pend_token);
2662 	}
2663 
2664 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2665 
2666 	thread_mtx_unlock(thread);
2667 
2668 	thread_policy_update_complete_unlocked(thread, &pend_token);
2669 
2670 	if (override_new) {
2671 		zfree(thread_qos_override_zone, override_new);
2672 	}
2673 
2674 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2675 	    int, new_qos_override, int, new_effective_qos, int, rc);
2676 
2677 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2678 	    new_qos_override, resource, resource_type, 0, 0);
2679 
2680 	return rc;
2681 }
2682 
2683 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2684 proc_thread_qos_add_override(task_t           task,
2685     thread_t         thread,
2686     uint64_t         tid,
2687     int              override_qos,
2688     boolean_t        first_override_for_resource,
2689     user_addr_t      resource,
2690     int              resource_type)
2691 {
2692 	boolean_t has_thread_reference = FALSE;
2693 	int rc = 0;
2694 
2695 	if (thread == THREAD_NULL) {
2696 		thread = task_findtid(task, tid);
2697 		/* returns referenced thread */
2698 
2699 		if (thread == THREAD_NULL) {
2700 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2701 			    tid, 0, 0xdead, 0, 0);
2702 			return ESRCH;
2703 		}
2704 		has_thread_reference = TRUE;
2705 	} else {
2706 		assert(get_threadtask(thread) == task);
2707 	}
2708 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2709 	    first_override_for_resource, resource, resource_type);
2710 	if (has_thread_reference) {
2711 		thread_deallocate(thread);
2712 	}
2713 
2714 	return rc;
2715 }
2716 
2717 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2718 proc_thread_qos_remove_override_internal(thread_t       thread,
2719     user_addr_t    resource,
2720     int            resource_type,
2721     boolean_t      reset)
2722 {
2723 	struct task_pend_token pend_token = {};
2724 
2725 	struct thread_qos_override *deferred_free_override_list = NULL;
2726 	int new_qos_override, prev_qos_override, new_effective_qos;
2727 
2728 	thread_mtx_lock(thread);
2729 
2730 	canonicalize_resource_and_type(&resource, &resource_type);
2731 
2732 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2733 
2734 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2735 	    thread_tid(thread), resource, reset, 0, 0);
2736 
2737 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2738 	    uint64_t, thread->requested_policy.thrp_qos,
2739 	    uint64_t, thread->effective_policy.thep_qos);
2740 
2741 	/* Determine how to combine the various overrides into a single current requested override */
2742 	new_qos_override = calculate_requested_qos_override(thread);
2743 
2744 	spl_t s = splsched();
2745 	thread_lock(thread);
2746 
2747 	/*
2748 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2749 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2750 	 * This means you can't change the current override from a spinlock-only setter.
2751 	 */
2752 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2753 
2754 	if (new_qos_override != prev_qos_override) {
2755 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2756 	}
2757 
2758 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2759 
2760 	thread_unlock(thread);
2761 	splx(s);
2762 
2763 	thread_mtx_unlock(thread);
2764 
2765 	thread_policy_update_complete_unlocked(thread, &pend_token);
2766 
2767 	while (deferred_free_override_list) {
2768 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2769 
2770 		zfree(thread_qos_override_zone, deferred_free_override_list);
2771 		deferred_free_override_list = override_next;
2772 	}
2773 
2774 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2775 	    int, new_qos_override, int, new_effective_qos);
2776 
2777 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2778 	    thread_tid(thread), 0, 0, 0, 0);
2779 }
2780 
2781 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2782 proc_thread_qos_remove_override(task_t      task,
2783     thread_t    thread,
2784     uint64_t    tid,
2785     user_addr_t resource,
2786     int         resource_type)
2787 {
2788 	boolean_t has_thread_reference = FALSE;
2789 
2790 	if (thread == THREAD_NULL) {
2791 		thread = task_findtid(task, tid);
2792 		/* returns referenced thread */
2793 
2794 		if (thread == THREAD_NULL) {
2795 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2796 			    tid, 0, 0xdead, 0, 0);
2797 			return ESRCH;
2798 		}
2799 		has_thread_reference = TRUE;
2800 	} else {
2801 		assert(task == get_threadtask(thread));
2802 	}
2803 
2804 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2805 
2806 	if (has_thread_reference) {
2807 		thread_deallocate(thread);
2808 	}
2809 
2810 	return 0;
2811 }
2812 
2813 /* Deallocate before thread termination */
2814 void
proc_thread_qos_deallocate(thread_t thread)2815 proc_thread_qos_deallocate(thread_t thread)
2816 {
2817 	/* This thread must have no more IPC overrides. */
2818 	assert(thread->kevent_overrides == 0);
2819 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2820 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2821 
2822 	/*
2823 	 * Clear out any lingering override objects.
2824 	 */
2825 	struct thread_qos_override *override;
2826 
2827 	thread_mtx_lock(thread);
2828 	override = thread->overrides;
2829 	thread->overrides = NULL;
2830 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2831 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2832 	thread_mtx_unlock(thread);
2833 
2834 	while (override) {
2835 		struct thread_qos_override *override_next = override->override_next;
2836 
2837 		zfree(thread_qos_override_zone, override);
2838 		override = override_next;
2839 	}
2840 }
2841 
2842 /*
2843  * Set up the primordial thread's QoS
2844  */
2845 void
task_set_main_thread_qos(task_t task,thread_t thread)2846 task_set_main_thread_qos(task_t task, thread_t thread)
2847 {
2848 	struct task_pend_token pend_token = {};
2849 
2850 	assert(get_threadtask(thread) == task);
2851 
2852 	thread_mtx_lock(thread);
2853 
2854 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2855 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2856 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2857 	    thread->requested_policy.thrp_qos, 0);
2858 
2859 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2860 
2861 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2862 	    primordial_qos, 0, &pend_token);
2863 
2864 	thread_mtx_unlock(thread);
2865 
2866 	thread_policy_update_complete_unlocked(thread, &pend_token);
2867 
2868 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2869 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2870 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2871 	    primordial_qos, 0);
2872 }
2873 
2874 /*
2875  * KPI for pthread kext
2876  *
2877  * Return a good guess at what the initial manager QoS will be
2878  * Dispatch can override this in userspace if it so chooses
2879  */
2880 thread_qos_t
task_get_default_manager_qos(task_t task)2881 task_get_default_manager_qos(task_t task)
2882 {
2883 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2884 
2885 	if (primordial_qos == THREAD_QOS_LEGACY) {
2886 		primordial_qos = THREAD_QOS_USER_INITIATED;
2887 	}
2888 
2889 	return primordial_qos;
2890 }
2891 
2892 /*
2893  * Check if the kernel promotion on thread has changed
2894  * and apply it.
2895  *
2896  * thread locked on entry and exit
2897  */
2898 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2899 thread_recompute_kernel_promotion_locked(thread_t thread)
2900 {
2901 	boolean_t needs_update = FALSE;
2902 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2903 
2904 	/*
2905 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2906 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2907 	 * and propagates the priority through the chain with the same cap, because as of now it does
2908 	 * not differenciate on the kernel primitive.
2909 	 *
2910 	 * If this assumption will change with the adoption of a kernel primitive that does not
2911 	 * cap the when adding/propagating,
2912 	 * then here is the place to put the generic cap for all kernel primitives
2913 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2914 	 */
2915 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2916 
2917 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2918 		KDBG(MACHDBG_CODE(
2919 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2920 		    thread_tid(thread),
2921 		    kern_promotion_schedpri,
2922 		    thread->kern_promotion_schedpri);
2923 
2924 		needs_update = TRUE;
2925 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
2926 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2927 	}
2928 
2929 	return needs_update;
2930 }
2931 
2932 /*
2933  * Check if the user promotion on thread has changed
2934  * and apply it.
2935  *
2936  * thread locked on entry, might drop the thread lock
2937  * and reacquire it.
2938  */
2939 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)2940 thread_recompute_user_promotion_locked(thread_t thread)
2941 {
2942 	boolean_t needs_update = FALSE;
2943 	struct task_pend_token pend_token = {};
2944 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2945 	int old_base_pri = thread->base_pri;
2946 	thread_qos_t qos_promotion;
2947 
2948 	/* Check if user promotion has changed */
2949 	if (thread->user_promotion_basepri == user_promotion_basepri) {
2950 		return needs_update;
2951 	} else {
2952 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2953 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2954 		    thread_tid(thread),
2955 		    user_promotion_basepri,
2956 		    thread->user_promotion_basepri,
2957 		    0, 0);
2958 		KDBG(MACHDBG_CODE(
2959 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2960 		    thread_tid(thread),
2961 		    user_promotion_basepri,
2962 		    thread->user_promotion_basepri);
2963 	}
2964 
2965 	/* Update the user promotion base pri */
2966 	thread->user_promotion_basepri = user_promotion_basepri;
2967 	pend_token.tpt_force_recompute_pri = 1;
2968 
2969 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2970 		qos_promotion = THREAD_QOS_UNSPECIFIED;
2971 	} else {
2972 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2973 	}
2974 
2975 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2976 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2977 
2978 	if (thread_get_waiting_turnstile(thread) &&
2979 	    thread->base_pri != old_base_pri) {
2980 		needs_update = TRUE;
2981 	}
2982 
2983 	thread_unlock(thread);
2984 
2985 	thread_policy_update_complete_unlocked(thread, &pend_token);
2986 
2987 	thread_lock(thread);
2988 
2989 	return needs_update;
2990 }
2991 
2992 /*
2993  * Convert the thread user promotion base pri to qos for threads in qos world.
2994  * For priority above UI qos, the qos would be set to UI.
2995  */
2996 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)2997 thread_user_promotion_qos_for_pri(int priority)
2998 {
2999 	thread_qos_t qos;
3000 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3001 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3002 			return qos;
3003 		}
3004 	}
3005 	return THREAD_QOS_MAINTENANCE;
3006 }
3007 
3008 /*
3009  * Set the thread's QoS Kevent override
3010  * Owned by the Kevent subsystem
3011  *
3012  * May be called with spinlocks held, but not spinlocks
3013  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3014  *
3015  * One 'add' must be balanced by one 'drop'.
3016  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3017  * Before the thread is deallocated, there must be 0 remaining overrides.
3018  */
3019 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3020 thread_kevent_override(thread_t    thread,
3021     uint32_t    qos_override,
3022     boolean_t   is_new_override)
3023 {
3024 	struct task_pend_token pend_token = {};
3025 	boolean_t needs_update;
3026 
3027 	spl_t s = splsched();
3028 	thread_lock(thread);
3029 
3030 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3031 
3032 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3033 	assert(qos_override < THREAD_QOS_LAST);
3034 
3035 	if (is_new_override) {
3036 		if (thread->kevent_overrides++ == 0) {
3037 			/* This add is the first override for this thread */
3038 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3039 		} else {
3040 			/* There are already other overrides in effect for this thread */
3041 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3042 		}
3043 	} else {
3044 		/* There must be at least one override (the previous add call) in effect */
3045 		assert(thread->kevent_overrides > 0);
3046 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3047 	}
3048 
3049 	/*
3050 	 * We can't allow lowering if there are several IPC overrides because
3051 	 * the caller can't possibly know the whole truth
3052 	 */
3053 	if (thread->kevent_overrides == 1) {
3054 		needs_update = qos_override != old_override;
3055 	} else {
3056 		needs_update = qos_override > old_override;
3057 	}
3058 
3059 	if (needs_update) {
3060 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3061 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3062 		    qos_override, 0, &pend_token);
3063 		assert(pend_token.tpt_update_sockets == 0);
3064 	}
3065 
3066 	thread_unlock(thread);
3067 	splx(s);
3068 
3069 	thread_policy_update_complete_unlocked(thread, &pend_token);
3070 }
3071 
3072 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3073 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3074 {
3075 	thread_kevent_override(thread, qos_override, TRUE);
3076 }
3077 
3078 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3079 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3080 {
3081 	thread_kevent_override(thread, qos_override, FALSE);
3082 }
3083 
3084 void
thread_drop_kevent_override(thread_t thread)3085 thread_drop_kevent_override(thread_t thread)
3086 {
3087 	struct task_pend_token pend_token = {};
3088 
3089 	spl_t s = splsched();
3090 	thread_lock(thread);
3091 
3092 	assert(thread->kevent_overrides > 0);
3093 
3094 	if (--thread->kevent_overrides == 0) {
3095 		/*
3096 		 * There are no more overrides for this thread, so we should
3097 		 * clear out the saturated override value
3098 		 */
3099 
3100 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3101 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3102 		    0, &pend_token);
3103 	}
3104 
3105 	thread_unlock(thread);
3106 	splx(s);
3107 
3108 	thread_policy_update_complete_unlocked(thread, &pend_token);
3109 }
3110 
3111 /*
3112  * Set the thread's QoS Workloop Servicer override
3113  * Owned by the Kevent subsystem
3114  *
3115  * May be called with spinlocks held, but not spinlocks
3116  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3117  *
3118  * One 'add' must be balanced by one 'drop'.
3119  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3120  * Before the thread is deallocated, there must be 0 remaining overrides.
3121  */
3122 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3123 thread_servicer_override(thread_t    thread,
3124     uint32_t    qos_override,
3125     boolean_t   is_new_override)
3126 {
3127 	struct task_pend_token pend_token = {};
3128 
3129 	spl_t s = splsched();
3130 	thread_lock(thread);
3131 
3132 	if (is_new_override) {
3133 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3134 	} else {
3135 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3136 	}
3137 
3138 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3139 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3140 	    qos_override, 0, &pend_token);
3141 
3142 	thread_unlock(thread);
3143 	splx(s);
3144 
3145 	assert(pend_token.tpt_update_sockets == 0);
3146 	thread_policy_update_complete_unlocked(thread, &pend_token);
3147 }
3148 
3149 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3150 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3151 {
3152 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3153 	assert(qos_override < THREAD_QOS_LAST);
3154 
3155 	thread_servicer_override(thread, qos_override, TRUE);
3156 }
3157 
3158 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3159 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3160 {
3161 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3162 	assert(qos_override < THREAD_QOS_LAST);
3163 
3164 	thread_servicer_override(thread, qos_override, FALSE);
3165 }
3166 
3167 void
thread_drop_servicer_override(thread_t thread)3168 thread_drop_servicer_override(thread_t thread)
3169 {
3170 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3171 }
3172 
3173 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3174 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3175 {
3176 	struct task_pend_token pend_token = {};
3177 	uint8_t current_iotier;
3178 
3179 	/* Check if the update is needed */
3180 	current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3181 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3182 
3183 	if (iotier_override == current_iotier) {
3184 		return;
3185 	}
3186 
3187 	spl_t s = splsched();
3188 	thread_lock(thread);
3189 
3190 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3191 	    TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3192 	    iotier_override, 0, &pend_token);
3193 
3194 	thread_unlock(thread);
3195 	splx(s);
3196 
3197 	assert(pend_token.tpt_update_sockets == 0);
3198 	thread_policy_update_complete_unlocked(thread, &pend_token);
3199 }
3200 
3201 /* Get current requested qos / relpri, may be called from spinlock context */
3202 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3203 thread_get_requested_qos(thread_t thread, int *relpri)
3204 {
3205 	int relprio_value = 0;
3206 	thread_qos_t qos;
3207 
3208 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3209 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3210 	if (relpri) {
3211 		*relpri = -relprio_value;
3212 	}
3213 	return qos;
3214 }
3215 
3216 /*
3217  * This function will promote the thread priority
3218  * since exec could block other threads calling
3219  * proc_find on the proc. This boost must be removed
3220  * via call to thread_clear_exec_promotion.
3221  *
3222  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3223  */
3224 void
thread_set_exec_promotion(thread_t thread)3225 thread_set_exec_promotion(thread_t thread)
3226 {
3227 	spl_t s = splsched();
3228 	thread_lock(thread);
3229 
3230 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3231 
3232 	thread_unlock(thread);
3233 	splx(s);
3234 }
3235 
3236 /*
3237  * This function will clear the exec thread
3238  * promotion set on the thread by thread_set_exec_promotion.
3239  */
3240 void
thread_clear_exec_promotion(thread_t thread)3241 thread_clear_exec_promotion(thread_t thread)
3242 {
3243 	spl_t s = splsched();
3244 	thread_lock(thread);
3245 
3246 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3247 
3248 	thread_unlock(thread);
3249 	splx(s);
3250 }
3251