xref: /xnu-8019.80.24/osfmk/kern/thread_policy.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31 
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41 
42 #include <mach/machine/sdt.h>
43 
44 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
45     struct thread_qos_override, KT_DEFAULT);
46 
47 #ifdef MACH_BSD
48 extern int      proc_selfpid(void);
49 extern char *   proc_name_address(void *p);
50 extern void     rethrottle_thread(void * uthread);
51 #endif /* MACH_BSD */
52 
53 #define QOS_EXTRACT(q)        ((q) & 0xff)
54 
55 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
56 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
59 
60 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
61     QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
62 
63 static void
64 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
65 
66 /*
67  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
68  * to threads that don't have a QoS class set.
69  */
70 const qos_policy_params_t thread_qos_policy_params = {
71 	/*
72 	 * This table defines the starting base priority of the thread,
73 	 * which will be modified by the thread importance and the task max priority
74 	 * before being applied.
75 	 */
76 	.qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
77 	.qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
78 	.qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
79 	.qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
80 	.qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
81 	.qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
82 	.qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
83 
84 	/*
85 	 * This table defines the highest IO priority that a thread marked with this
86 	 * QoS class can have.
87 	 */
88 	.qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
89 	.qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
90 	.qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
91 	.qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
92 	.qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
93 	.qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
94 	.qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
95 
96 	/*
97 	 * This table defines the highest QoS level that
98 	 * a thread marked with this QoS class can have.
99 	 */
100 
101 	.qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
102 	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
103 	.qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
104 	.qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
105 	.qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
106 	.qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
107 	.qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
108 
109 	.qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
110 	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
111 	.qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
112 	.qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
113 	.qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
114 	.qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
115 	.qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
116 };
117 
118 static void
119 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
120 
121 static int
122 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
123 
124 static void
125 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
126 
127 static void
128 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129 
130 static void
131 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
132 
133 static void
134 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
135 
136 static int
137 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
138 
139 static int
140 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
141 
142 static void
143 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
144 
145 static void
146 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
147 
148 boolean_t
thread_has_qos_policy(thread_t thread)149 thread_has_qos_policy(thread_t thread)
150 {
151 	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
152 }
153 
154 
155 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)156 thread_remove_qos_policy_locked(thread_t thread,
157     task_pend_token_t pend_token)
158 {
159 	__unused int prev_qos = thread->requested_policy.thrp_qos;
160 
161 	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
162 
163 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
164 	    THREAD_QOS_UNSPECIFIED, 0, pend_token);
165 }
166 
167 kern_return_t
thread_remove_qos_policy(thread_t thread)168 thread_remove_qos_policy(thread_t thread)
169 {
170 	struct task_pend_token pend_token = {};
171 
172 	thread_mtx_lock(thread);
173 	if (!thread->active) {
174 		thread_mtx_unlock(thread);
175 		return KERN_TERMINATED;
176 	}
177 
178 	thread_remove_qos_policy_locked(thread, &pend_token);
179 
180 	thread_mtx_unlock(thread);
181 
182 	thread_policy_update_complete_unlocked(thread, &pend_token);
183 
184 	return KERN_SUCCESS;
185 }
186 
187 
188 boolean_t
thread_is_static_param(thread_t thread)189 thread_is_static_param(thread_t thread)
190 {
191 	if (thread->static_param) {
192 		DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
193 		return TRUE;
194 	}
195 	return FALSE;
196 }
197 
198 /*
199  * Relative priorities can range between 0REL and -15REL. These
200  * map to QoS-specific ranges, to create non-overlapping priority
201  * ranges.
202  */
203 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)204 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
205 {
206 	int next_lower_qos;
207 
208 	/* Fast path, since no validation or scaling is needed */
209 	if (qos_relprio == 0) {
210 		return 0;
211 	}
212 
213 	switch (qos) {
214 	case THREAD_QOS_USER_INTERACTIVE:
215 		next_lower_qos = THREAD_QOS_USER_INITIATED;
216 		break;
217 	case THREAD_QOS_USER_INITIATED:
218 		next_lower_qos = THREAD_QOS_LEGACY;
219 		break;
220 	case THREAD_QOS_LEGACY:
221 		next_lower_qos = THREAD_QOS_UTILITY;
222 		break;
223 	case THREAD_QOS_UTILITY:
224 		next_lower_qos = THREAD_QOS_BACKGROUND;
225 		break;
226 	case THREAD_QOS_MAINTENANCE:
227 	case THREAD_QOS_BACKGROUND:
228 		next_lower_qos = 0;
229 		break;
230 	default:
231 		panic("Unrecognized QoS %d", qos);
232 		return 0;
233 	}
234 
235 	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
236 	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
237 
238 	/*
239 	 * We now have the valid range that the scaled relative priority can map to. Note
240 	 * that the lower bound is exclusive, but the upper bound is inclusive. If the
241 	 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
242 	 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
243 	 * remainder.
244 	 */
245 	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
246 
247 	return scaled_relprio;
248 }
249 
250 /*
251  * flag set by -qos-policy-allow boot-arg to allow
252  * testing thread qos policy from userspace
253  */
254 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
255 
256 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)257 thread_policy_set(
258 	thread_t                                thread,
259 	thread_policy_flavor_t  flavor,
260 	thread_policy_t                 policy_info,
261 	mach_msg_type_number_t  count)
262 {
263 	thread_qos_policy_data_t req_qos;
264 	kern_return_t kr;
265 
266 	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
267 
268 	if (thread == THREAD_NULL) {
269 		return KERN_INVALID_ARGUMENT;
270 	}
271 
272 	if (!allow_qos_policy_set) {
273 		if (thread_is_static_param(thread)) {
274 			return KERN_POLICY_STATIC;
275 		}
276 
277 		if (flavor == THREAD_QOS_POLICY) {
278 			return KERN_INVALID_ARGUMENT;
279 		}
280 
281 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
282 			if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
283 				return KERN_INVALID_ARGUMENT;
284 			}
285 			thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
286 			if (info->priority != BASEPRI_RTQUEUES) {
287 				return KERN_INVALID_ARGUMENT;
288 			}
289 		}
290 	}
291 
292 	/* Threads without static_param set reset their QoS when other policies are applied. */
293 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
294 		/* Store the existing tier, if we fail this call it is used to reset back. */
295 		req_qos.qos_tier = thread->requested_policy.thrp_qos;
296 		req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
297 
298 		kr = thread_remove_qos_policy(thread);
299 		if (kr != KERN_SUCCESS) {
300 			return kr;
301 		}
302 	}
303 
304 	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
305 
306 	/* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
307 	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
308 		if (kr != KERN_SUCCESS) {
309 			/* Reset back to our original tier as the set failed. */
310 			(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
311 		}
312 	}
313 
314 	return kr;
315 }
316 
317 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
318 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
319 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
320 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
321 
322 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)323 thread_policy_set_internal(
324 	thread_t                     thread,
325 	thread_policy_flavor_t       flavor,
326 	thread_policy_t              policy_info,
327 	mach_msg_type_number_t       count)
328 {
329 	kern_return_t result = KERN_SUCCESS;
330 	struct task_pend_token pend_token = {};
331 
332 	thread_mtx_lock(thread);
333 	if (!thread->active) {
334 		thread_mtx_unlock(thread);
335 
336 		return KERN_TERMINATED;
337 	}
338 
339 	switch (flavor) {
340 	case THREAD_EXTENDED_POLICY:
341 	{
342 		boolean_t timeshare = TRUE;
343 
344 		if (count >= THREAD_EXTENDED_POLICY_COUNT) {
345 			thread_extended_policy_t info;
346 
347 			info = (thread_extended_policy_t)policy_info;
348 			timeshare = info->timeshare;
349 		}
350 
351 		sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
352 
353 		spl_t s = splsched();
354 		thread_lock(thread);
355 
356 		thread_set_user_sched_mode_and_recompute_pri(thread, mode);
357 
358 		thread_unlock(thread);
359 		splx(s);
360 
361 		pend_token.tpt_update_thread_sfi = 1;
362 
363 		break;
364 	}
365 
366 	case THREAD_TIME_CONSTRAINT_POLICY:
367 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
368 	{
369 		thread_time_constraint_with_priority_policy_t info;
370 
371 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
372 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
373 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
374 
375 		if (count < min_count) {
376 			result = KERN_INVALID_ARGUMENT;
377 			break;
378 		}
379 
380 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
381 
382 
383 		if (info->constraint < info->computation ||
384 		    info->computation > max_rt_quantum ||
385 		    info->computation < min_rt_quantum) {
386 			result = KERN_INVALID_ARGUMENT;
387 			break;
388 		}
389 
390 		if (info->computation < (info->constraint / 2)) {
391 			info->computation = (info->constraint / 2);
392 			if (info->computation > max_rt_quantum) {
393 				info->computation = max_rt_quantum;
394 			}
395 		}
396 
397 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
398 			if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
399 				result = KERN_INVALID_ARGUMENT;
400 				break;
401 			}
402 		}
403 
404 		spl_t s = splsched();
405 		thread_lock(thread);
406 
407 		thread->realtime.period          = info->period;
408 		thread->realtime.computation     = info->computation;
409 		thread->realtime.constraint      = info->constraint;
410 		thread->realtime.preemptible     = info->preemptible;
411 		if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
412 			thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
413 		} else {
414 			thread->realtime.priority_offset = 0;
415 			/* Or check for override from allowed thread group? */
416 		}
417 
418 		thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
419 
420 		thread_unlock(thread);
421 		splx(s);
422 
423 		pend_token.tpt_update_thread_sfi = 1;
424 
425 		break;
426 	}
427 
428 	case THREAD_PRECEDENCE_POLICY:
429 	{
430 		thread_precedence_policy_t info;
431 
432 		if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
433 			result = KERN_INVALID_ARGUMENT;
434 			break;
435 		}
436 		info = (thread_precedence_policy_t)policy_info;
437 
438 		spl_t s = splsched();
439 		thread_lock(thread);
440 
441 		thread->importance = info->importance;
442 
443 		thread_recompute_priority(thread);
444 
445 		thread_unlock(thread);
446 		splx(s);
447 
448 		break;
449 	}
450 
451 	case THREAD_AFFINITY_POLICY:
452 	{
453 		thread_affinity_policy_t info;
454 
455 		if (!thread_affinity_is_supported()) {
456 			result = KERN_NOT_SUPPORTED;
457 			break;
458 		}
459 		if (count < THREAD_AFFINITY_POLICY_COUNT) {
460 			result = KERN_INVALID_ARGUMENT;
461 			break;
462 		}
463 
464 		info = (thread_affinity_policy_t) policy_info;
465 		/*
466 		 * Unlock the thread mutex here and
467 		 * return directly after calling thread_affinity_set().
468 		 * This is necessary for correct lock ordering because
469 		 * thread_affinity_set() takes the task lock.
470 		 */
471 		thread_mtx_unlock(thread);
472 		return thread_affinity_set(thread, info->affinity_tag);
473 	}
474 
475 #if !defined(XNU_TARGET_OS_OSX)
476 	case THREAD_BACKGROUND_POLICY:
477 	{
478 		thread_background_policy_t info;
479 
480 		if (count < THREAD_BACKGROUND_POLICY_COUNT) {
481 			result = KERN_INVALID_ARGUMENT;
482 			break;
483 		}
484 
485 		if (get_threadtask(thread) != current_task()) {
486 			result = KERN_PROTECTION_FAILURE;
487 			break;
488 		}
489 
490 		info = (thread_background_policy_t) policy_info;
491 
492 		int enable;
493 
494 		if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
495 			enable = TASK_POLICY_ENABLE;
496 		} else {
497 			enable = TASK_POLICY_DISABLE;
498 		}
499 
500 		int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
501 
502 		proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
503 
504 		break;
505 	}
506 #endif /* !defined(XNU_TARGET_OS_OSX) */
507 
508 	case THREAD_THROUGHPUT_QOS_POLICY:
509 	{
510 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
511 		thread_throughput_qos_t tqos;
512 
513 		if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
514 			result = KERN_INVALID_ARGUMENT;
515 			break;
516 		}
517 
518 		if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
519 			break;
520 		}
521 
522 		tqos = qos_extract(info->thread_throughput_qos_tier);
523 
524 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
525 		    TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
526 
527 		break;
528 	}
529 
530 	case THREAD_LATENCY_QOS_POLICY:
531 	{
532 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
533 		thread_latency_qos_t lqos;
534 
535 		if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
536 			result = KERN_INVALID_ARGUMENT;
537 			break;
538 		}
539 
540 		if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
541 			break;
542 		}
543 
544 		lqos = qos_extract(info->thread_latency_qos_tier);
545 
546 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
547 		    TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
548 
549 		break;
550 	}
551 
552 	case THREAD_QOS_POLICY:
553 	{
554 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
555 
556 		if (count < THREAD_QOS_POLICY_COUNT) {
557 			result = KERN_INVALID_ARGUMENT;
558 			break;
559 		}
560 
561 		if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
562 			result = KERN_INVALID_ARGUMENT;
563 			break;
564 		}
565 
566 		if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
567 			result = KERN_INVALID_ARGUMENT;
568 			break;
569 		}
570 
571 		if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
572 			result = KERN_INVALID_ARGUMENT;
573 			break;
574 		}
575 
576 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
577 		    info->qos_tier, -info->tier_importance, &pend_token);
578 
579 		break;
580 	}
581 
582 	default:
583 		result = KERN_INVALID_ARGUMENT;
584 		break;
585 	}
586 
587 	thread_mtx_unlock(thread);
588 
589 	thread_policy_update_complete_unlocked(thread, &pend_token);
590 
591 	return result;
592 }
593 
594 /*
595  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
596  * Both result in FIXED mode scheduling.
597  */
598 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)599 convert_policy_to_sched_mode(integer_t policy)
600 {
601 	switch (policy) {
602 	case POLICY_TIMESHARE:
603 		return TH_MODE_TIMESHARE;
604 	case POLICY_RR:
605 	case POLICY_FIFO:
606 		return TH_MODE_FIXED;
607 	default:
608 		panic("unexpected sched policy: %d", policy);
609 		return TH_MODE_NONE;
610 	}
611 }
612 
613 /*
614  * Called either with the thread mutex locked
615  * or from the pthread kext in a 'safe place'.
616  */
617 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)618 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
619     sched_mode_t          mode,
620     integer_t             priority,
621     task_pend_token_t     pend_token)
622 {
623 	kern_return_t kr = KERN_SUCCESS;
624 
625 	spl_t s = splsched();
626 	thread_lock(thread);
627 
628 	/* This path isn't allowed to change a thread out of realtime. */
629 	if ((thread->sched_mode == TH_MODE_REALTIME) ||
630 	    (thread->saved_mode == TH_MODE_REALTIME)) {
631 		kr = KERN_FAILURE;
632 		goto unlock;
633 	}
634 
635 	if (thread->policy_reset) {
636 		kr = KERN_SUCCESS;
637 		goto unlock;
638 	}
639 
640 	sched_mode_t old_mode = thread->sched_mode;
641 
642 	/*
643 	 * Reverse engineer and apply the correct importance value
644 	 * from the requested absolute priority value.
645 	 *
646 	 * TODO: Store the absolute priority value instead
647 	 */
648 
649 	if (priority >= thread->max_priority) {
650 		priority = thread->max_priority - thread->task_priority;
651 	} else if (priority >= MINPRI_KERNEL) {
652 		priority -=  MINPRI_KERNEL;
653 	} else if (priority >= MINPRI_RESERVED) {
654 		priority -=  MINPRI_RESERVED;
655 	} else {
656 		priority -= BASEPRI_DEFAULT;
657 	}
658 
659 	priority += thread->task_priority;
660 
661 	if (priority > thread->max_priority) {
662 		priority = thread->max_priority;
663 	} else if (priority < MINPRI) {
664 		priority = MINPRI;
665 	}
666 
667 	thread->importance = priority - thread->task_priority;
668 
669 	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
670 
671 	if (mode != old_mode) {
672 		pend_token->tpt_update_thread_sfi = 1;
673 	}
674 
675 unlock:
676 	thread_unlock(thread);
677 	splx(s);
678 
679 	return kr;
680 }
681 
682 void
thread_freeze_base_pri(thread_t thread)683 thread_freeze_base_pri(thread_t thread)
684 {
685 	assert(thread == current_thread());
686 
687 	spl_t s = splsched();
688 	thread_lock(thread);
689 
690 	assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
691 	thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
692 
693 	thread_unlock(thread);
694 	splx(s);
695 }
696 
697 bool
thread_unfreeze_base_pri(thread_t thread)698 thread_unfreeze_base_pri(thread_t thread)
699 {
700 	assert(thread == current_thread());
701 	integer_t base_pri;
702 	ast_t ast = 0;
703 
704 	spl_t s = splsched();
705 	thread_lock(thread);
706 
707 	assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
708 	thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
709 
710 	base_pri = thread->req_base_pri;
711 	if (base_pri != thread->base_pri) {
712 		/*
713 		 * This function returns "true" if the base pri change
714 		 * is the most likely cause for the preemption.
715 		 */
716 		sched_set_thread_base_priority(thread, base_pri);
717 		ast = ast_peek(AST_PREEMPT);
718 	}
719 
720 	thread_unlock(thread);
721 	splx(s);
722 
723 	return ast != 0;
724 }
725 
726 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)727 thread_workq_pri_for_qos(thread_qos_t qos)
728 {
729 	assert(qos < THREAD_QOS_LAST);
730 	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
731 }
732 
733 thread_qos_t
thread_workq_qos_for_pri(int priority)734 thread_workq_qos_for_pri(int priority)
735 {
736 	thread_qos_t qos;
737 	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
738 		// indicate that workq should map >UI threads to workq's
739 		// internal notation for above-UI work.
740 		return THREAD_QOS_UNSPECIFIED;
741 	}
742 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
743 		// map a given priority up to the next nearest qos band.
744 		if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
745 			return qos;
746 		}
747 	}
748 	return THREAD_QOS_MAINTENANCE;
749 }
750 
751 /*
752  * private interface for pthread workqueues
753  *
754  * Set scheduling policy & absolute priority for thread
755  * May be called with spinlocks held
756  * Thread mutex lock is not held
757  */
758 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)759 thread_reset_workq_qos(thread_t thread, uint32_t qos)
760 {
761 	struct task_pend_token pend_token = {};
762 
763 	assert(qos < THREAD_QOS_LAST);
764 
765 	spl_t s = splsched();
766 	thread_lock(thread);
767 
768 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
769 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
770 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
771 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
772 	    &pend_token);
773 
774 	assert(pend_token.tpt_update_sockets == 0);
775 
776 	thread_unlock(thread);
777 	splx(s);
778 
779 	thread_policy_update_complete_unlocked(thread, &pend_token);
780 }
781 
782 /*
783  * private interface for pthread workqueues
784  *
785  * Set scheduling policy & absolute priority for thread
786  * May be called with spinlocks held
787  * Thread mutex lock is held
788  */
789 void
thread_set_workq_override(thread_t thread,uint32_t qos)790 thread_set_workq_override(thread_t thread, uint32_t qos)
791 {
792 	struct task_pend_token pend_token = {};
793 
794 	assert(qos < THREAD_QOS_LAST);
795 
796 	spl_t s = splsched();
797 	thread_lock(thread);
798 
799 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
800 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
801 
802 	assert(pend_token.tpt_update_sockets == 0);
803 
804 	thread_unlock(thread);
805 	splx(s);
806 
807 	thread_policy_update_complete_unlocked(thread, &pend_token);
808 }
809 
810 /*
811  * private interface for pthread workqueues
812  *
813  * Set scheduling policy & absolute priority for thread
814  * May be called with spinlocks held
815  * Thread mutex lock is not held
816  */
817 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)818 thread_set_workq_pri(thread_t  thread,
819     thread_qos_t qos,
820     integer_t priority,
821     integer_t policy)
822 {
823 	struct task_pend_token pend_token = {};
824 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
825 
826 	assert(qos < THREAD_QOS_LAST);
827 	assert(thread->static_param);
828 
829 	if (!thread->static_param || !thread->active) {
830 		return;
831 	}
832 
833 	spl_t s = splsched();
834 	thread_lock(thread);
835 
836 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
837 	    TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
838 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
839 	    TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
840 	    0, &pend_token);
841 
842 	thread_unlock(thread);
843 	splx(s);
844 
845 	/* Concern: this doesn't hold the mutex... */
846 
847 	__assert_only kern_return_t kr;
848 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
849 	    &pend_token);
850 	assert(kr == KERN_SUCCESS);
851 
852 	if (pend_token.tpt_update_thread_sfi) {
853 		sfi_reevaluate(thread);
854 	}
855 }
856 
857 /*
858  * thread_set_mode_and_absolute_pri:
859  *
860  * Set scheduling policy & absolute priority for thread, for deprecated
861  * thread_set_policy and thread_policy interfaces.
862  *
863  * Called with nothing locked.
864  */
865 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)866 thread_set_mode_and_absolute_pri(thread_t   thread,
867     integer_t  policy,
868     integer_t  priority)
869 {
870 	kern_return_t kr = KERN_SUCCESS;
871 	struct task_pend_token pend_token = {};
872 
873 	sched_mode_t mode = convert_policy_to_sched_mode(policy);
874 
875 	thread_mtx_lock(thread);
876 
877 	if (!thread->active) {
878 		kr = KERN_TERMINATED;
879 		goto unlock;
880 	}
881 
882 	if (thread_is_static_param(thread)) {
883 		kr = KERN_POLICY_STATIC;
884 		goto unlock;
885 	}
886 
887 	/* Setting legacy policies on threads kills the current QoS */
888 	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
889 		thread_remove_qos_policy_locked(thread, &pend_token);
890 	}
891 
892 	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
893 
894 unlock:
895 	thread_mtx_unlock(thread);
896 
897 	thread_policy_update_complete_unlocked(thread, &pend_token);
898 
899 	return kr;
900 }
901 
902 /*
903  * Set the thread's requested mode and recompute priority
904  * Called with thread mutex and thread locked
905  *
906  * TODO: Mitigate potential problems caused by moving thread to end of runq
907  * whenever its priority is recomputed
908  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
909  */
910 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)911 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
912 {
913 	if (thread->policy_reset) {
914 		return;
915 	}
916 
917 	boolean_t removed = thread_run_queue_remove(thread);
918 
919 	/*
920 	 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
921 	 * That way there's zero confusion over which the user wants
922 	 * and which the kernel wants.
923 	 */
924 	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
925 		thread->saved_mode = mode;
926 	} else {
927 		sched_set_thread_mode(thread, mode);
928 	}
929 
930 	thread_recompute_priority(thread);
931 
932 	if (removed) {
933 		thread_run_queue_reinsert(thread, SCHED_TAILQ);
934 	}
935 }
936 
937 /* called at splsched with thread lock locked */
938 static void
thread_update_qos_cpu_time_locked(thread_t thread)939 thread_update_qos_cpu_time_locked(thread_t thread)
940 {
941 	task_t task = get_threadtask(thread);
942 	uint64_t timer_sum, timer_delta;
943 
944 	/*
945 	 * This is only as accurate as the distance between
946 	 * last context switch (embedded) or last user/kernel boundary transition (desktop)
947 	 * because user_timer and system_timer are only updated then.
948 	 *
949 	 * TODO: Consider running a timer_update operation here to update it first.
950 	 *       Maybe doable with interrupts disabled from current thread.
951 	 *       If the thread is on a different core, may not be easy to get right.
952 	 *
953 	 * TODO: There should be a function for this in timer.c
954 	 */
955 
956 	timer_sum = timer_grab(&thread->user_timer);
957 	timer_sum += timer_grab(&thread->system_timer);
958 	timer_delta = timer_sum - thread->vtimer_qos_save;
959 
960 	thread->vtimer_qos_save = timer_sum;
961 
962 	uint64_t* task_counter = NULL;
963 
964 	/* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
965 	switch (thread->effective_policy.thep_qos) {
966 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
967 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
968 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
969 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
970 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
971 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
972 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
973 	default:
974 		panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
975 	}
976 
977 	OSAddAtomic64(timer_delta, task_counter);
978 
979 	/* Update the task-level qos stats atomically, because we don't have the task lock. */
980 	switch (thread->requested_policy.thrp_qos) {
981 	case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
982 	case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
983 	case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
984 	case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
985 	case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
986 	case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
987 	case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
988 	default:
989 		panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
990 	}
991 
992 	OSAddAtomic64(timer_delta, task_counter);
993 }
994 
995 /*
996  * called with no thread locks held
997  * may hold task lock
998  */
999 void
thread_update_qos_cpu_time(thread_t thread)1000 thread_update_qos_cpu_time(thread_t thread)
1001 {
1002 	thread_mtx_lock(thread);
1003 
1004 	spl_t s = splsched();
1005 	thread_lock(thread);
1006 
1007 	thread_update_qos_cpu_time_locked(thread);
1008 
1009 	thread_unlock(thread);
1010 	splx(s);
1011 
1012 	thread_mtx_unlock(thread);
1013 }
1014 
1015 /*
1016  * Calculate base priority from thread attributes, and set it on the thread
1017  *
1018  * Called with thread_lock and thread mutex held.
1019  */
1020 extern boolean_t vps_dynamic_priority_enabled;
1021 
1022 void
thread_recompute_priority(thread_t thread)1023 thread_recompute_priority(
1024 	thread_t                thread)
1025 {
1026 	integer_t               priority;
1027 
1028 	if (thread->policy_reset) {
1029 		return;
1030 	}
1031 
1032 	if (thread->sched_mode == TH_MODE_REALTIME) {
1033 		uint8_t i = thread->realtime.priority_offset;
1034 		assert((i >= 0) && (i < NRTQS));
1035 		priority = BASEPRI_RTQUEUES + i;
1036 		sched_set_thread_base_priority(thread, priority);
1037 		if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1038 			/* Make sure the thread has a valid deadline */
1039 			uint64_t ctime = mach_absolute_time();
1040 			thread->realtime.deadline = thread->realtime.constraint + ctime;
1041 			KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1042 			    (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1043 		}
1044 		return;
1045 	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1046 		int qos = thread->effective_policy.thep_qos;
1047 		int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1048 		int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1049 		int qos_scaled_relprio;
1050 
1051 		assert(qos >= 0 && qos < THREAD_QOS_LAST);
1052 		assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1053 
1054 		priority = thread_qos_policy_params.qos_pri[qos];
1055 		qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1056 
1057 		if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1058 			/* Bump priority 46 to 47 when in a frontmost app */
1059 			qos_scaled_relprio += 1;
1060 		}
1061 
1062 		/* TODO: factor in renice priority here? */
1063 
1064 		priority += qos_scaled_relprio;
1065 	} else {
1066 		if (thread->importance > MAXPRI) {
1067 			priority = MAXPRI;
1068 		} else if (thread->importance < -MAXPRI) {
1069 			priority = -MAXPRI;
1070 		} else {
1071 			priority = thread->importance;
1072 		}
1073 
1074 		priority += thread->task_priority;
1075 	}
1076 
1077 	priority = MAX(priority, thread->user_promotion_basepri);
1078 
1079 	/*
1080 	 * Clamp priority back into the allowed range for this task.
1081 	 *  The initial priority value could be out of this range due to:
1082 	 *      Task clamped to BG or Utility (max-pri is 4, or 20)
1083 	 *      Task is user task (max-pri is 63)
1084 	 *      Task is kernel task (max-pri is 95)
1085 	 * Note that thread->importance is user-settable to any integer
1086 	 * via THREAD_PRECEDENCE_POLICY.
1087 	 */
1088 	if (priority > thread->max_priority) {
1089 		if (thread->effective_policy.thep_promote_above_task) {
1090 			priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1091 		} else {
1092 			priority = thread->max_priority;
1093 		}
1094 	} else if (priority < MINPRI) {
1095 		priority = MINPRI;
1096 	}
1097 
1098 	if (thread->saved_mode == TH_MODE_REALTIME &&
1099 	    thread->sched_flags & TH_SFLAG_FAILSAFE) {
1100 		priority = DEPRESSPRI;
1101 	}
1102 
1103 	if (thread->effective_policy.thep_terminated == TRUE) {
1104 		/*
1105 		 * We temporarily want to override the expected priority to
1106 		 * ensure that the thread exits in a timely manner.
1107 		 * Note that this is allowed to exceed thread->max_priority
1108 		 * so that the thread is no longer clamped to background
1109 		 * during the final exit phase.
1110 		 */
1111 		if (priority < thread->task_priority) {
1112 			priority = thread->task_priority;
1113 		}
1114 		if (priority < BASEPRI_DEFAULT) {
1115 			priority = BASEPRI_DEFAULT;
1116 		}
1117 	}
1118 
1119 #if !defined(XNU_TARGET_OS_OSX)
1120 	/* No one can have a base priority less than MAXPRI_THROTTLE */
1121 	if (priority < MAXPRI_THROTTLE) {
1122 		priority = MAXPRI_THROTTLE;
1123 	}
1124 #endif /* !defined(XNU_TARGET_OS_OSX) */
1125 
1126 	sched_set_thread_base_priority(thread, priority);
1127 }
1128 
1129 /* Called with the task lock held, but not the thread mutex or spinlock */
1130 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1131 thread_policy_update_tasklocked(
1132 	thread_t           thread,
1133 	integer_t          priority,
1134 	integer_t          max_priority,
1135 	task_pend_token_t  pend_token)
1136 {
1137 	thread_mtx_lock(thread);
1138 
1139 	if (!thread->active || thread->policy_reset) {
1140 		thread_mtx_unlock(thread);
1141 		return;
1142 	}
1143 
1144 	spl_t s = splsched();
1145 	thread_lock(thread);
1146 
1147 	__unused
1148 	integer_t old_max_priority = thread->max_priority;
1149 
1150 	assert(priority >= INT16_MIN && priority <= INT16_MAX);
1151 	thread->task_priority = (int16_t)priority;
1152 
1153 	assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1154 	thread->max_priority = (int16_t)max_priority;
1155 
1156 	/*
1157 	 * When backgrounding a thread, realtime and fixed priority threads
1158 	 * should be demoted to timeshare background threads.
1159 	 *
1160 	 * TODO: Do this inside the thread policy update routine in order to avoid double
1161 	 * remove/reinsert for a runnable thread
1162 	 */
1163 	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1164 		sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1165 	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1166 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1167 	}
1168 
1169 	thread_policy_update_spinlocked(thread, true, pend_token);
1170 
1171 	thread_unlock(thread);
1172 	splx(s);
1173 
1174 	thread_mtx_unlock(thread);
1175 }
1176 
1177 /*
1178  * Reset thread to default state in preparation for termination
1179  * Called with thread mutex locked
1180  *
1181  * Always called on current thread, so we don't need a run queue remove
1182  */
1183 void
thread_policy_reset(thread_t thread)1184 thread_policy_reset(
1185 	thread_t                thread)
1186 {
1187 	spl_t           s;
1188 
1189 	assert(thread == current_thread());
1190 
1191 	s = splsched();
1192 	thread_lock(thread);
1193 
1194 	if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1195 		sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1196 	}
1197 
1198 	if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1199 		sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1200 	}
1201 
1202 	/* At this point, the various demotions should be inactive */
1203 	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1204 	assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1205 	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1206 
1207 	/* Reset thread back to task-default basepri and mode  */
1208 	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1209 
1210 	sched_set_thread_mode(thread, newmode);
1211 
1212 	thread->importance = 0;
1213 
1214 	/* Prevent further changes to thread base priority or mode */
1215 	thread->policy_reset = 1;
1216 
1217 	sched_set_thread_base_priority(thread, thread->task_priority);
1218 
1219 	thread_unlock(thread);
1220 	splx(s);
1221 }
1222 
1223 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1224 thread_policy_get(
1225 	thread_t                                thread,
1226 	thread_policy_flavor_t  flavor,
1227 	thread_policy_t                 policy_info,
1228 	mach_msg_type_number_t  *count,
1229 	boolean_t                               *get_default)
1230 {
1231 	kern_return_t                   result = KERN_SUCCESS;
1232 
1233 	if (thread == THREAD_NULL) {
1234 		return KERN_INVALID_ARGUMENT;
1235 	}
1236 
1237 	thread_mtx_lock(thread);
1238 	if (!thread->active) {
1239 		thread_mtx_unlock(thread);
1240 
1241 		return KERN_TERMINATED;
1242 	}
1243 
1244 	switch (flavor) {
1245 	case THREAD_EXTENDED_POLICY:
1246 	{
1247 		boolean_t               timeshare = TRUE;
1248 
1249 		if (!(*get_default)) {
1250 			spl_t s = splsched();
1251 			thread_lock(thread);
1252 
1253 			if ((thread->sched_mode != TH_MODE_REALTIME) &&
1254 			    (thread->saved_mode != TH_MODE_REALTIME)) {
1255 				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1256 					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1257 				} else {
1258 					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1259 				}
1260 			} else {
1261 				*get_default = TRUE;
1262 			}
1263 
1264 			thread_unlock(thread);
1265 			splx(s);
1266 		}
1267 
1268 		if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1269 			thread_extended_policy_t        info;
1270 
1271 			info = (thread_extended_policy_t)policy_info;
1272 			info->timeshare = timeshare;
1273 		}
1274 
1275 		break;
1276 	}
1277 
1278 	case THREAD_TIME_CONSTRAINT_POLICY:
1279 	case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1280 	{
1281 		thread_time_constraint_with_priority_policy_t         info;
1282 
1283 		mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1284 		    THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1285 		    THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1286 
1287 		if (*count < min_count) {
1288 			result = KERN_INVALID_ARGUMENT;
1289 			break;
1290 		}
1291 
1292 		info = (thread_time_constraint_with_priority_policy_t)policy_info;
1293 
1294 		if (!(*get_default)) {
1295 			spl_t s = splsched();
1296 			thread_lock(thread);
1297 
1298 			if ((thread->sched_mode == TH_MODE_REALTIME) ||
1299 			    (thread->saved_mode == TH_MODE_REALTIME)) {
1300 				info->period = thread->realtime.period;
1301 				info->computation = thread->realtime.computation;
1302 				info->constraint = thread->realtime.constraint;
1303 				info->preemptible = thread->realtime.preemptible;
1304 				if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1305 					info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1306 				}
1307 			} else {
1308 				*get_default = TRUE;
1309 			}
1310 
1311 			thread_unlock(thread);
1312 			splx(s);
1313 		}
1314 
1315 		if (*get_default) {
1316 			info->period = 0;
1317 			info->computation = default_timeshare_computation;
1318 			info->constraint = default_timeshare_constraint;
1319 			info->preemptible = TRUE;
1320 			if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1321 				info->priority = BASEPRI_RTQUEUES;
1322 			}
1323 		}
1324 
1325 
1326 		break;
1327 	}
1328 
1329 	case THREAD_PRECEDENCE_POLICY:
1330 	{
1331 		thread_precedence_policy_t              info;
1332 
1333 		if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1334 			result = KERN_INVALID_ARGUMENT;
1335 			break;
1336 		}
1337 
1338 		info = (thread_precedence_policy_t)policy_info;
1339 
1340 		if (!(*get_default)) {
1341 			spl_t s = splsched();
1342 			thread_lock(thread);
1343 
1344 			info->importance = thread->importance;
1345 
1346 			thread_unlock(thread);
1347 			splx(s);
1348 		} else {
1349 			info->importance = 0;
1350 		}
1351 
1352 		break;
1353 	}
1354 
1355 	case THREAD_AFFINITY_POLICY:
1356 	{
1357 		thread_affinity_policy_t                info;
1358 
1359 		if (!thread_affinity_is_supported()) {
1360 			result = KERN_NOT_SUPPORTED;
1361 			break;
1362 		}
1363 		if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1364 			result = KERN_INVALID_ARGUMENT;
1365 			break;
1366 		}
1367 
1368 		info = (thread_affinity_policy_t)policy_info;
1369 
1370 		if (!(*get_default)) {
1371 			info->affinity_tag = thread_affinity_get(thread);
1372 		} else {
1373 			info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1374 		}
1375 
1376 		break;
1377 	}
1378 
1379 	case THREAD_POLICY_STATE:
1380 	{
1381 		thread_policy_state_t           info;
1382 
1383 		if (*count < THREAD_POLICY_STATE_COUNT) {
1384 			result = KERN_INVALID_ARGUMENT;
1385 			break;
1386 		}
1387 
1388 		/* Only root can get this info */
1389 		if (!task_is_privileged(current_task())) {
1390 			result = KERN_PROTECTION_FAILURE;
1391 			break;
1392 		}
1393 
1394 		info = (thread_policy_state_t)(void*)policy_info;
1395 
1396 		if (!(*get_default)) {
1397 			info->flags = 0;
1398 
1399 			spl_t s = splsched();
1400 			thread_lock(thread);
1401 
1402 			info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1403 
1404 			info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1405 			info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1406 
1407 			info->thps_user_promotions          = 0;
1408 			info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1409 			info->thps_ipc_overrides            = thread->kevent_overrides;
1410 
1411 			proc_get_thread_policy_bitfield(thread, info);
1412 
1413 			thread_unlock(thread);
1414 			splx(s);
1415 		} else {
1416 			info->requested = 0;
1417 			info->effective = 0;
1418 			info->pending = 0;
1419 		}
1420 
1421 		break;
1422 	}
1423 
1424 	case THREAD_LATENCY_QOS_POLICY:
1425 	{
1426 		thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1427 		thread_latency_qos_t plqos;
1428 
1429 		if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1430 			result = KERN_INVALID_ARGUMENT;
1431 			break;
1432 		}
1433 
1434 		if (*get_default) {
1435 			plqos = 0;
1436 		} else {
1437 			plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1438 		}
1439 
1440 		info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1441 	}
1442 	break;
1443 
1444 	case THREAD_THROUGHPUT_QOS_POLICY:
1445 	{
1446 		thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1447 		thread_throughput_qos_t ptqos;
1448 
1449 		if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1450 			result = KERN_INVALID_ARGUMENT;
1451 			break;
1452 		}
1453 
1454 		if (*get_default) {
1455 			ptqos = 0;
1456 		} else {
1457 			ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1458 		}
1459 
1460 		info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1461 	}
1462 	break;
1463 
1464 	case THREAD_QOS_POLICY:
1465 	{
1466 		thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1467 
1468 		if (*count < THREAD_QOS_POLICY_COUNT) {
1469 			result = KERN_INVALID_ARGUMENT;
1470 			break;
1471 		}
1472 
1473 		if (!(*get_default)) {
1474 			int relprio_value = 0;
1475 			info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1476 			    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1477 
1478 			info->tier_importance = -relprio_value;
1479 		} else {
1480 			info->qos_tier = THREAD_QOS_UNSPECIFIED;
1481 			info->tier_importance = 0;
1482 		}
1483 
1484 		break;
1485 	}
1486 
1487 	default:
1488 		result = KERN_INVALID_ARGUMENT;
1489 		break;
1490 	}
1491 
1492 	thread_mtx_unlock(thread);
1493 
1494 	return result;
1495 }
1496 
1497 void
thread_policy_create(thread_t thread)1498 thread_policy_create(thread_t thread)
1499 {
1500 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1501 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1502 	    thread_tid(thread), theffective_0(thread),
1503 	    theffective_1(thread), thread->base_pri, 0);
1504 
1505 	/* We pass a pend token but ignore it */
1506 	struct task_pend_token pend_token = {};
1507 
1508 	thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1509 
1510 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1511 	    (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1512 	    thread_tid(thread), theffective_0(thread),
1513 	    theffective_1(thread), thread->base_pri, 0);
1514 }
1515 
1516 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1517 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1518 {
1519 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1520 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1521 	    thread_tid(thread), theffective_0(thread),
1522 	    theffective_1(thread), thread->base_pri, 0);
1523 
1524 	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1525 
1526 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1527 	    (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1528 	    thread_tid(thread), theffective_0(thread),
1529 	    theffective_1(thread), thread->base_pri, 0);
1530 }
1531 
1532 
1533 
1534 /*
1535  * One thread state update function TO RULE THEM ALL
1536  *
1537  * This function updates the thread effective policy fields
1538  * and pushes the results to the relevant subsystems.
1539  *
1540  * Returns TRUE if a pended action needs to be run.
1541  *
1542  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1543  */
1544 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1545 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1546     task_pend_token_t pend_token)
1547 {
1548 	/*
1549 	 * Step 1:
1550 	 *  Gather requested policy and effective task state
1551 	 */
1552 
1553 	struct thread_requested_policy requested = thread->requested_policy;
1554 	struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1555 
1556 	/*
1557 	 * Step 2:
1558 	 *  Calculate new effective policies from requested policy, task and thread state
1559 	 *  Rules:
1560 	 *      Don't change requested, it won't take effect
1561 	 */
1562 
1563 	struct thread_effective_policy next = {};
1564 
1565 	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1566 
1567 	uint32_t next_qos = requested.thrp_qos;
1568 
1569 	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1570 		next_qos = MAX(requested.thrp_qos_override, next_qos);
1571 		next_qos = MAX(requested.thrp_qos_promote, next_qos);
1572 		next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1573 		next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1574 		next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1575 	}
1576 
1577 	if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1578 	    requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1579 		/*
1580 		 * This thread is turnstile-boosted higher than the adaptive clamp
1581 		 * by a synchronous waiter. Allow that to override the adaptive
1582 		 * clamp temporarily for this thread only.
1583 		 */
1584 		next.thep_promote_above_task = true;
1585 		next_qos = requested.thrp_qos_promote;
1586 	}
1587 
1588 	next.thep_qos = next_qos;
1589 
1590 	/* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1591 	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1592 		if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1593 			next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1594 		} else {
1595 			next.thep_qos = task_effective.tep_qos_clamp;
1596 		}
1597 	}
1598 
1599 	/*
1600 	 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1601 	 * This allows QoS promotions to work properly even after the process is unclamped.
1602 	 */
1603 	next.thep_qos_promote = next.thep_qos;
1604 
1605 	/* The ceiling only applies to threads that are in the QoS world */
1606 	/* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1607 	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1608 	    next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1609 		next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1610 	}
1611 
1612 	/*
1613 	 * The QoS relative priority is only applicable when the original programmer's
1614 	 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1615 	 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1616 	 * since otherwise it would be lower than unclamped threads. Similarly, in the
1617 	 * presence of boosting, the programmer doesn't know what other actors
1618 	 * are boosting the thread.
1619 	 */
1620 	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1621 	    (requested.thrp_qos == next.thep_qos) &&
1622 	    (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1623 		next.thep_qos_relprio = requested.thrp_qos_relprio;
1624 	} else {
1625 		next.thep_qos_relprio = 0;
1626 	}
1627 
1628 	/* Calculate DARWIN_BG */
1629 	bool wants_darwinbg        = false;
1630 	bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1631 
1632 	if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1633 		wants_darwinbg = true;
1634 	}
1635 
1636 	/*
1637 	 * If DARWIN_BG has been requested at either level, it's engaged.
1638 	 * darwinbg threads always create bg sockets,
1639 	 * but only some types of darwinbg change the sockets
1640 	 * after they're created
1641 	 */
1642 	if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1643 		wants_all_sockets_bg = wants_darwinbg = true;
1644 	}
1645 
1646 	if (requested.thrp_pidbind_bg) {
1647 		wants_all_sockets_bg = wants_darwinbg = true;
1648 	}
1649 
1650 	if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1651 	    next.thep_qos == THREAD_QOS_MAINTENANCE) {
1652 		wants_darwinbg = true;
1653 	}
1654 
1655 	/* Calculate side effects of DARWIN_BG */
1656 
1657 	if (wants_darwinbg) {
1658 		next.thep_darwinbg = 1;
1659 	}
1660 
1661 	if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1662 		next.thep_new_sockets_bg = 1;
1663 	}
1664 
1665 	/* Don't use task_effective.tep_all_sockets_bg here */
1666 	if (wants_all_sockets_bg) {
1667 		next.thep_all_sockets_bg = 1;
1668 	}
1669 
1670 	/* darwinbg implies background QOS (or lower) */
1671 	if (next.thep_darwinbg &&
1672 	    (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1673 		next.thep_qos = THREAD_QOS_BACKGROUND;
1674 		next.thep_qos_relprio = 0;
1675 	}
1676 
1677 	/* Calculate IO policy */
1678 
1679 	int iopol = THROTTLE_LEVEL_TIER0;
1680 
1681 	/* Factor in the task's IO policy */
1682 	if (next.thep_darwinbg) {
1683 		iopol = MAX(iopol, task_effective.tep_bg_iotier);
1684 	}
1685 
1686 	if (!next.thep_promote_above_task) {
1687 		iopol = MAX(iopol, task_effective.tep_io_tier);
1688 	}
1689 
1690 	/* Look up the associated IO tier value for the QoS class */
1691 	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1692 
1693 	iopol = MAX(iopol, requested.thrp_int_iotier);
1694 	iopol = MAX(iopol, requested.thrp_ext_iotier);
1695 
1696 	next.thep_io_tier = iopol;
1697 
1698 	/*
1699 	 * If a QoS override is causing IO to go into a lower tier, we also set
1700 	 * the passive bit so that a thread doesn't end up stuck in its own throttle
1701 	 * window when the override goes away.
1702 	 */
1703 
1704 	int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1705 	int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1706 	bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1707 
1708 	/* Calculate Passive IO policy */
1709 	if (requested.thrp_ext_iopassive ||
1710 	    requested.thrp_int_iopassive ||
1711 	    qos_io_override_active ||
1712 	    task_effective.tep_io_passive) {
1713 		next.thep_io_passive = 1;
1714 	}
1715 
1716 	/* Calculate timer QOS */
1717 	uint32_t latency_qos = requested.thrp_latency_qos;
1718 
1719 	if (!next.thep_promote_above_task) {
1720 		latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1721 	}
1722 
1723 	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1724 
1725 	next.thep_latency_qos = latency_qos;
1726 
1727 	/* Calculate throughput QOS */
1728 	uint32_t through_qos = requested.thrp_through_qos;
1729 
1730 	if (!next.thep_promote_above_task) {
1731 		through_qos = MAX(through_qos, task_effective.tep_through_qos);
1732 	}
1733 
1734 	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1735 
1736 	next.thep_through_qos = through_qos;
1737 
1738 	if (task_effective.tep_terminated || requested.thrp_terminated) {
1739 		/* Shoot down the throttles that slow down exit or response to SIGTERM */
1740 		next.thep_terminated    = 1;
1741 		next.thep_darwinbg      = 0;
1742 		next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1743 		next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1744 		next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1745 		next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1746 	}
1747 
1748 	/*
1749 	 * Step 3:
1750 	 *  Swap out old policy for new policy
1751 	 */
1752 
1753 	struct thread_effective_policy prev = thread->effective_policy;
1754 
1755 	thread_update_qos_cpu_time_locked(thread);
1756 
1757 	/* This is the point where the new values become visible to other threads */
1758 	thread->effective_policy = next;
1759 
1760 	/*
1761 	 * Step 4:
1762 	 *  Pend updates that can't be done while holding the thread lock
1763 	 */
1764 
1765 	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1766 		pend_token->tpt_update_sockets = 1;
1767 	}
1768 
1769 	/* TODO: Doesn't this only need to be done if the throttle went up? */
1770 	if (prev.thep_io_tier != next.thep_io_tier) {
1771 		pend_token->tpt_update_throttle = 1;
1772 	}
1773 
1774 	/*
1775 	 * Check for the attributes that sfi_thread_classify() consults,
1776 	 *  and trigger SFI re-evaluation.
1777 	 */
1778 	if (prev.thep_qos != next.thep_qos ||
1779 	    prev.thep_darwinbg != next.thep_darwinbg) {
1780 		pend_token->tpt_update_thread_sfi = 1;
1781 	}
1782 
1783 	integer_t old_base_pri = thread->base_pri;
1784 
1785 	/*
1786 	 * Step 5:
1787 	 *  Update other subsystems as necessary if something has changed
1788 	 */
1789 
1790 	/* Check for the attributes that thread_recompute_priority() consults */
1791 	if (prev.thep_qos != next.thep_qos ||
1792 	    prev.thep_qos_relprio != next.thep_qos_relprio ||
1793 	    prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1794 	    prev.thep_promote_above_task != next.thep_promote_above_task ||
1795 	    prev.thep_terminated != next.thep_terminated ||
1796 	    pend_token->tpt_force_recompute_pri == 1 ||
1797 	    recompute_priority) {
1798 		thread_recompute_priority(thread);
1799 	}
1800 
1801 	/*
1802 	 * Check if the thread is waiting on a turnstile and needs priority propagation.
1803 	 */
1804 	if (pend_token->tpt_update_turnstile &&
1805 	    ((old_base_pri == thread->base_pri) ||
1806 	    !thread_get_waiting_turnstile(thread))) {
1807 		/*
1808 		 * Reset update turnstile pend token since either
1809 		 * the thread priority did not change or thread is
1810 		 * not blocked on a turnstile.
1811 		 */
1812 		pend_token->tpt_update_turnstile = 0;
1813 	}
1814 }
1815 
1816 
1817 /*
1818  * Initiate a thread policy state transition on a thread with its TID
1819  * Useful if you cannot guarantee the thread won't get terminated
1820  * Precondition: No locks are held
1821  * Will take task lock - using the non-tid variant is faster
1822  * if you already have a thread ref.
1823  */
1824 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1825 proc_set_thread_policy_with_tid(task_t     task,
1826     uint64_t   tid,
1827     int        category,
1828     int        flavor,
1829     int        value)
1830 {
1831 	/* takes task lock, returns ref'ed thread or NULL */
1832 	thread_t thread = task_findtid(task, tid);
1833 
1834 	if (thread == THREAD_NULL) {
1835 		return;
1836 	}
1837 
1838 	proc_set_thread_policy(thread, category, flavor, value);
1839 
1840 	thread_deallocate(thread);
1841 }
1842 
1843 /*
1844  * Initiate a thread policy transition on a thread
1845  * This path supports networking transitions (i.e. darwinbg transitions)
1846  * Precondition: No locks are held
1847  */
1848 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1849 proc_set_thread_policy(thread_t   thread,
1850     int        category,
1851     int        flavor,
1852     int        value)
1853 {
1854 	struct task_pend_token pend_token = {};
1855 
1856 	thread_mtx_lock(thread);
1857 
1858 	proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1859 
1860 	thread_mtx_unlock(thread);
1861 
1862 	thread_policy_update_complete_unlocked(thread, &pend_token);
1863 }
1864 
1865 /*
1866  * Do the things that can't be done while holding a thread mutex.
1867  * These are set up to call back into thread policy to get the latest value,
1868  * so they don't have to be synchronized with the update.
1869  * The only required semantic is 'call this sometime after updating effective policy'
1870  *
1871  * Precondition: Thread mutex is not held
1872  *
1873  * This may be called with the task lock held, but in that case it won't be
1874  * called with tpt_update_sockets set.
1875  */
1876 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1877 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1878 {
1879 #ifdef MACH_BSD
1880 	if (pend_token->tpt_update_sockets) {
1881 		proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1882 	}
1883 #endif /* MACH_BSD */
1884 
1885 	if (pend_token->tpt_update_throttle) {
1886 		rethrottle_thread(get_bsdthread_info(thread));
1887 	}
1888 
1889 	if (pend_token->tpt_update_thread_sfi) {
1890 		sfi_reevaluate(thread);
1891 	}
1892 
1893 	if (pend_token->tpt_update_turnstile) {
1894 		turnstile_update_thread_priority_chain(thread);
1895 	}
1896 }
1897 
1898 /*
1899  * Set and update thread policy
1900  * Thread mutex might be held
1901  */
1902 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1903 proc_set_thread_policy_locked(thread_t          thread,
1904     int               category,
1905     int               flavor,
1906     int               value,
1907     int               value2,
1908     task_pend_token_t pend_token)
1909 {
1910 	spl_t s = splsched();
1911 	thread_lock(thread);
1912 
1913 	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1914 
1915 	thread_unlock(thread);
1916 	splx(s);
1917 }
1918 
1919 /*
1920  * Set and update thread policy
1921  * Thread spinlock is held
1922  */
1923 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1924 proc_set_thread_policy_spinlocked(thread_t          thread,
1925     int               category,
1926     int               flavor,
1927     int               value,
1928     int               value2,
1929     task_pend_token_t pend_token)
1930 {
1931 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1932 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1933 	    thread_tid(thread), threquested_0(thread),
1934 	    threquested_1(thread), value, 0);
1935 
1936 	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1937 
1938 	thread_policy_update_spinlocked(thread, false, pend_token);
1939 
1940 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1941 	    (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1942 	    thread_tid(thread), threquested_0(thread),
1943 	    threquested_1(thread), tpending(pend_token), 0);
1944 }
1945 
1946 /*
1947  * Set the requested state for a specific flavor to a specific value.
1948  */
1949 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1950 thread_set_requested_policy_spinlocked(thread_t     thread,
1951     int               category,
1952     int               flavor,
1953     int               value,
1954     int               value2,
1955     task_pend_token_t pend_token)
1956 {
1957 	int tier, passive;
1958 
1959 	struct thread_requested_policy requested = thread->requested_policy;
1960 
1961 	switch (flavor) {
1962 	/* Category: EXTERNAL and INTERNAL, thread and task */
1963 
1964 	case TASK_POLICY_DARWIN_BG:
1965 		if (category == TASK_POLICY_EXTERNAL) {
1966 			requested.thrp_ext_darwinbg = value;
1967 		} else {
1968 			requested.thrp_int_darwinbg = value;
1969 		}
1970 		break;
1971 
1972 	case TASK_POLICY_IOPOL:
1973 		proc_iopol_to_tier(value, &tier, &passive);
1974 		if (category == TASK_POLICY_EXTERNAL) {
1975 			requested.thrp_ext_iotier  = tier;
1976 			requested.thrp_ext_iopassive = passive;
1977 		} else {
1978 			requested.thrp_int_iotier  = tier;
1979 			requested.thrp_int_iopassive = passive;
1980 		}
1981 		break;
1982 
1983 	case TASK_POLICY_IO:
1984 		if (category == TASK_POLICY_EXTERNAL) {
1985 			requested.thrp_ext_iotier = value;
1986 		} else {
1987 			requested.thrp_int_iotier = value;
1988 		}
1989 		break;
1990 
1991 	case TASK_POLICY_PASSIVE_IO:
1992 		if (category == TASK_POLICY_EXTERNAL) {
1993 			requested.thrp_ext_iopassive = value;
1994 		} else {
1995 			requested.thrp_int_iopassive = value;
1996 		}
1997 		break;
1998 
1999 	/* Category: ATTRIBUTE, thread only */
2000 
2001 	case TASK_POLICY_PIDBIND_BG:
2002 		assert(category == TASK_POLICY_ATTRIBUTE);
2003 		requested.thrp_pidbind_bg = value;
2004 		break;
2005 
2006 	case TASK_POLICY_LATENCY_QOS:
2007 		assert(category == TASK_POLICY_ATTRIBUTE);
2008 		requested.thrp_latency_qos = value;
2009 		break;
2010 
2011 	case TASK_POLICY_THROUGH_QOS:
2012 		assert(category == TASK_POLICY_ATTRIBUTE);
2013 		requested.thrp_through_qos = value;
2014 		break;
2015 
2016 	case TASK_POLICY_QOS_OVERRIDE:
2017 		assert(category == TASK_POLICY_ATTRIBUTE);
2018 		requested.thrp_qos_override = value;
2019 		pend_token->tpt_update_turnstile = 1;
2020 		break;
2021 
2022 	case TASK_POLICY_QOS_AND_RELPRIO:
2023 		assert(category == TASK_POLICY_ATTRIBUTE);
2024 		requested.thrp_qos = value;
2025 		requested.thrp_qos_relprio = value2;
2026 		pend_token->tpt_update_turnstile = 1;
2027 		DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2028 		break;
2029 
2030 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2031 		assert(category == TASK_POLICY_ATTRIBUTE);
2032 		requested.thrp_qos_workq_override = value;
2033 		pend_token->tpt_update_turnstile = 1;
2034 		break;
2035 
2036 	case TASK_POLICY_QOS_PROMOTE:
2037 		assert(category == TASK_POLICY_ATTRIBUTE);
2038 		requested.thrp_qos_promote = value;
2039 		break;
2040 
2041 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2042 		assert(category == TASK_POLICY_ATTRIBUTE);
2043 		requested.thrp_qos_kevent_override = value;
2044 		pend_token->tpt_update_turnstile = 1;
2045 		break;
2046 
2047 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2048 		assert(category == TASK_POLICY_ATTRIBUTE);
2049 		requested.thrp_qos_wlsvc_override = value;
2050 		pend_token->tpt_update_turnstile = 1;
2051 		break;
2052 
2053 	case TASK_POLICY_TERMINATED:
2054 		assert(category == TASK_POLICY_ATTRIBUTE);
2055 		requested.thrp_terminated = value;
2056 		break;
2057 
2058 	default:
2059 		panic("unknown task policy: %d %d %d", category, flavor, value);
2060 		break;
2061 	}
2062 
2063 	thread->requested_policy = requested;
2064 }
2065 
2066 /*
2067  * Gets what you set. Effective values may be different.
2068  * Precondition: No locks are held
2069  */
2070 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2071 proc_get_thread_policy(thread_t   thread,
2072     int        category,
2073     int        flavor)
2074 {
2075 	int value = 0;
2076 	thread_mtx_lock(thread);
2077 	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2078 	thread_mtx_unlock(thread);
2079 	return value;
2080 }
2081 
2082 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2083 proc_get_thread_policy_locked(thread_t   thread,
2084     int        category,
2085     int        flavor,
2086     int*       value2)
2087 {
2088 	int value = 0;
2089 
2090 	spl_t s = splsched();
2091 	thread_lock(thread);
2092 
2093 	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2094 
2095 	thread_unlock(thread);
2096 	splx(s);
2097 
2098 	return value;
2099 }
2100 
2101 /*
2102  * Gets what you set. Effective values may be different.
2103  */
2104 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2105 thread_get_requested_policy_spinlocked(thread_t thread,
2106     int      category,
2107     int      flavor,
2108     int*     value2)
2109 {
2110 	int value = 0;
2111 
2112 	struct thread_requested_policy requested = thread->requested_policy;
2113 
2114 	switch (flavor) {
2115 	case TASK_POLICY_DARWIN_BG:
2116 		if (category == TASK_POLICY_EXTERNAL) {
2117 			value = requested.thrp_ext_darwinbg;
2118 		} else {
2119 			value = requested.thrp_int_darwinbg;
2120 		}
2121 		break;
2122 	case TASK_POLICY_IOPOL:
2123 		if (category == TASK_POLICY_EXTERNAL) {
2124 			value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2125 			    requested.thrp_ext_iopassive);
2126 		} else {
2127 			value = proc_tier_to_iopol(requested.thrp_int_iotier,
2128 			    requested.thrp_int_iopassive);
2129 		}
2130 		break;
2131 	case TASK_POLICY_IO:
2132 		if (category == TASK_POLICY_EXTERNAL) {
2133 			value = requested.thrp_ext_iotier;
2134 		} else {
2135 			value = requested.thrp_int_iotier;
2136 		}
2137 		break;
2138 	case TASK_POLICY_PASSIVE_IO:
2139 		if (category == TASK_POLICY_EXTERNAL) {
2140 			value = requested.thrp_ext_iopassive;
2141 		} else {
2142 			value = requested.thrp_int_iopassive;
2143 		}
2144 		break;
2145 	case TASK_POLICY_QOS:
2146 		assert(category == TASK_POLICY_ATTRIBUTE);
2147 		value = requested.thrp_qos;
2148 		break;
2149 	case TASK_POLICY_QOS_OVERRIDE:
2150 		assert(category == TASK_POLICY_ATTRIBUTE);
2151 		value = requested.thrp_qos_override;
2152 		break;
2153 	case TASK_POLICY_LATENCY_QOS:
2154 		assert(category == TASK_POLICY_ATTRIBUTE);
2155 		value = requested.thrp_latency_qos;
2156 		break;
2157 	case TASK_POLICY_THROUGH_QOS:
2158 		assert(category == TASK_POLICY_ATTRIBUTE);
2159 		value = requested.thrp_through_qos;
2160 		break;
2161 	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2162 		assert(category == TASK_POLICY_ATTRIBUTE);
2163 		value = requested.thrp_qos_workq_override;
2164 		break;
2165 	case TASK_POLICY_QOS_AND_RELPRIO:
2166 		assert(category == TASK_POLICY_ATTRIBUTE);
2167 		assert(value2 != NULL);
2168 		value = requested.thrp_qos;
2169 		*value2 = requested.thrp_qos_relprio;
2170 		break;
2171 	case TASK_POLICY_QOS_PROMOTE:
2172 		assert(category == TASK_POLICY_ATTRIBUTE);
2173 		value = requested.thrp_qos_promote;
2174 		break;
2175 	case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2176 		assert(category == TASK_POLICY_ATTRIBUTE);
2177 		value = requested.thrp_qos_kevent_override;
2178 		break;
2179 	case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2180 		assert(category == TASK_POLICY_ATTRIBUTE);
2181 		value = requested.thrp_qos_wlsvc_override;
2182 		break;
2183 	case TASK_POLICY_TERMINATED:
2184 		assert(category == TASK_POLICY_ATTRIBUTE);
2185 		value = requested.thrp_terminated;
2186 		break;
2187 
2188 	default:
2189 		panic("unknown policy_flavor %d", flavor);
2190 		break;
2191 	}
2192 
2193 	return value;
2194 }
2195 
2196 /*
2197  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2198  *
2199  * NOTE: This accessor does not take the task or thread lock.
2200  * Notifications of state updates need to be externally synchronized with state queries.
2201  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2202  * within the context of a timer interrupt.
2203  *
2204  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2205  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2206  *      I don't think that cost is worth not having the right answer.
2207  */
2208 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2209 proc_get_effective_thread_policy(thread_t thread,
2210     int      flavor)
2211 {
2212 	int value = 0;
2213 
2214 	switch (flavor) {
2215 	case TASK_POLICY_DARWIN_BG:
2216 		/*
2217 		 * This call is used within the timer layer, as well as
2218 		 * prioritizing requests to the graphics system.
2219 		 * It also informs SFI and originator-bg-state.
2220 		 * Returns 1 for background mode, 0 for normal mode
2221 		 */
2222 
2223 		value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2224 		break;
2225 	case TASK_POLICY_IO:
2226 		/*
2227 		 * The I/O system calls here to find out what throttling tier to apply to an operation.
2228 		 * Returns THROTTLE_LEVEL_* values
2229 		 */
2230 		value = thread->effective_policy.thep_io_tier;
2231 		if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2232 			value = MIN(value, thread->iotier_override);
2233 		}
2234 		break;
2235 	case TASK_POLICY_PASSIVE_IO:
2236 		/*
2237 		 * The I/O system calls here to find out whether an operation should be passive.
2238 		 * (i.e. not cause operations with lower throttle tiers to be throttled)
2239 		 * Returns 1 for passive mode, 0 for normal mode
2240 		 *
2241 		 * If an override is causing IO to go into a lower tier, we also set
2242 		 * the passive bit so that a thread doesn't end up stuck in its own throttle
2243 		 * window when the override goes away.
2244 		 */
2245 		value = thread->effective_policy.thep_io_passive ? 1 : 0;
2246 		if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2247 		    thread->iotier_override < thread->effective_policy.thep_io_tier) {
2248 			value = 1;
2249 		}
2250 		break;
2251 	case TASK_POLICY_ALL_SOCKETS_BG:
2252 		/*
2253 		 * do_background_socket() calls this to determine whether
2254 		 * it should change the thread's sockets
2255 		 * Returns 1 for background mode, 0 for normal mode
2256 		 * This consults both thread and task so un-DBGing a thread while the task is BG
2257 		 * doesn't get you out of the network throttle.
2258 		 */
2259 		value = (thread->effective_policy.thep_all_sockets_bg ||
2260 		    get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2261 		break;
2262 	case TASK_POLICY_NEW_SOCKETS_BG:
2263 		/*
2264 		 * socreate() calls this to determine if it should mark a new socket as background
2265 		 * Returns 1 for background mode, 0 for normal mode
2266 		 */
2267 		value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2268 		break;
2269 	case TASK_POLICY_LATENCY_QOS:
2270 		/*
2271 		 * timer arming calls into here to find out the timer coalescing level
2272 		 * Returns a latency QoS tier (0-6)
2273 		 */
2274 		value = thread->effective_policy.thep_latency_qos;
2275 		break;
2276 	case TASK_POLICY_THROUGH_QOS:
2277 		/*
2278 		 * This value is passed into the urgency callout from the scheduler
2279 		 * to the performance management subsystem.
2280 		 *
2281 		 * Returns a throughput QoS tier (0-6)
2282 		 */
2283 		value = thread->effective_policy.thep_through_qos;
2284 		break;
2285 	case TASK_POLICY_QOS:
2286 		/*
2287 		 * This is communicated to the performance management layer and SFI.
2288 		 *
2289 		 * Returns a QoS policy tier
2290 		 */
2291 		value = thread->effective_policy.thep_qos;
2292 		break;
2293 	default:
2294 		panic("unknown thread policy flavor %d", flavor);
2295 		break;
2296 	}
2297 
2298 	return value;
2299 }
2300 
2301 
2302 /*
2303  * (integer_t) casts limit the number of bits we can fit here
2304  * this interface is deprecated and replaced by the _EXT struct ?
2305  */
2306 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2307 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2308 {
2309 	uint64_t bits = 0;
2310 	struct thread_requested_policy requested = thread->requested_policy;
2311 
2312 	bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2313 	bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2314 	bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2315 	bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2316 	bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2317 	bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2318 
2319 	bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2320 	bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2321 
2322 	bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2323 
2324 	bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2325 	bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2326 
2327 	info->requested = (integer_t) bits;
2328 	bits = 0;
2329 
2330 	struct thread_effective_policy effective = thread->effective_policy;
2331 
2332 	bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2333 
2334 	bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2335 	bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2336 	bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2337 	bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2338 
2339 	bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2340 
2341 	bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2342 	bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2343 
2344 	info->effective = (integer_t)bits;
2345 	bits = 0;
2346 
2347 	info->pending = 0;
2348 }
2349 
2350 /*
2351  * Sneakily trace either the task and thread requested
2352  * or just the thread requested, depending on if we have enough room.
2353  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2354  *
2355  *                                LP32            LP64
2356  * threquested_0(thread)          thread[0]       task[0]
2357  * threquested_1(thread)          thread[1]       thread[0]
2358  *
2359  */
2360 
2361 uintptr_t
threquested_0(thread_t thread)2362 threquested_0(thread_t thread)
2363 {
2364 	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2365 
2366 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2367 
2368 	return raw[0];
2369 }
2370 
2371 uintptr_t
threquested_1(thread_t thread)2372 threquested_1(thread_t thread)
2373 {
2374 #if defined __LP64__
2375 	return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2376 #else
2377 	uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2378 	return raw[1];
2379 #endif
2380 }
2381 
2382 uintptr_t
theffective_0(thread_t thread)2383 theffective_0(thread_t thread)
2384 {
2385 	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2386 
2387 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2388 	return raw[0];
2389 }
2390 
2391 uintptr_t
theffective_1(thread_t thread)2392 theffective_1(thread_t thread)
2393 {
2394 #if defined __LP64__
2395 	return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2396 #else
2397 	uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2398 	return raw[1];
2399 #endif
2400 }
2401 
2402 
2403 /*
2404  * Set an override on the thread which is consulted with a
2405  * higher priority than the task/thread policy. This should
2406  * only be set for temporary grants until the thread
2407  * returns to the userspace boundary
2408  *
2409  * We use atomic operations to swap in the override, with
2410  * the assumption that the thread itself can
2411  * read the override and clear it on return to userspace.
2412  *
2413  * No locking is performed, since it is acceptable to see
2414  * a stale override for one loop through throttle_lowpri_io().
2415  * However a thread reference must be held on the thread.
2416  */
2417 
2418 void
set_thread_iotier_override(thread_t thread,int policy)2419 set_thread_iotier_override(thread_t thread, int policy)
2420 {
2421 	int current_override;
2422 
2423 	/* Let most aggressive I/O policy win until user boundary */
2424 	do {
2425 		current_override = thread->iotier_override;
2426 
2427 		if (current_override != THROTTLE_LEVEL_NONE) {
2428 			policy = MIN(current_override, policy);
2429 		}
2430 
2431 		if (current_override == policy) {
2432 			/* no effective change */
2433 			return;
2434 		}
2435 	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2436 
2437 	/*
2438 	 * Since the thread may be currently throttled,
2439 	 * re-evaluate tiers and potentially break out
2440 	 * of an msleep
2441 	 */
2442 	rethrottle_thread(get_bsdthread_info(thread));
2443 }
2444 
2445 /*
2446  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2447  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2448  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2449  * priority thread. In these cases, we attempt to propagate the priority token, as long
2450  * as the subsystem informs us of the relationships between the threads. The userspace
2451  * synchronization subsystem should maintain the information of owner->resource and
2452  * resource->waiters itself.
2453  */
2454 
2455 /*
2456  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2457  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2458  * to be handled specially in the future, but for now it's fine to slam
2459  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2460  */
2461 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2462 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2463 {
2464 	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2465 		/* Map all input resource/type to a single one */
2466 		*resource = USER_ADDR_NULL;
2467 		*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2468 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2469 		/* no transform */
2470 	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2471 		/* Map all mutex overrides to a single one, to avoid memory overhead */
2472 		if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2473 			*resource = USER_ADDR_NULL;
2474 		}
2475 	}
2476 }
2477 
2478 /* This helper routine finds an existing override if known. Locking should be done by caller */
2479 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2480 find_qos_override(thread_t thread,
2481     user_addr_t resource,
2482     int resource_type)
2483 {
2484 	struct thread_qos_override *override;
2485 
2486 	override = thread->overrides;
2487 	while (override) {
2488 		if (override->override_resource == resource &&
2489 		    override->override_resource_type == resource_type) {
2490 			return override;
2491 		}
2492 
2493 		override = override->override_next;
2494 	}
2495 
2496 	return NULL;
2497 }
2498 
2499 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2500 find_and_decrement_qos_override(thread_t       thread,
2501     user_addr_t    resource,
2502     int            resource_type,
2503     boolean_t      reset,
2504     struct thread_qos_override **free_override_list)
2505 {
2506 	struct thread_qos_override *override, *override_prev;
2507 
2508 	override_prev = NULL;
2509 	override = thread->overrides;
2510 	while (override) {
2511 		struct thread_qos_override *override_next = override->override_next;
2512 
2513 		if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2514 		    (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2515 			if (reset) {
2516 				override->override_contended_resource_count = 0;
2517 			} else {
2518 				override->override_contended_resource_count--;
2519 			}
2520 
2521 			if (override->override_contended_resource_count == 0) {
2522 				if (override_prev == NULL) {
2523 					thread->overrides = override_next;
2524 				} else {
2525 					override_prev->override_next = override_next;
2526 				}
2527 
2528 				/* Add to out-param for later zfree */
2529 				override->override_next = *free_override_list;
2530 				*free_override_list = override;
2531 			} else {
2532 				override_prev = override;
2533 			}
2534 
2535 			if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2536 				return;
2537 			}
2538 		} else {
2539 			override_prev = override;
2540 		}
2541 
2542 		override = override_next;
2543 	}
2544 }
2545 
2546 /* This helper recalculates the current requested override using the policy selected at boot */
2547 static int
calculate_requested_qos_override(thread_t thread)2548 calculate_requested_qos_override(thread_t thread)
2549 {
2550 	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2551 		return THREAD_QOS_UNSPECIFIED;
2552 	}
2553 
2554 	/* iterate over all overrides and calculate MAX */
2555 	struct thread_qos_override *override;
2556 	int qos_override = THREAD_QOS_UNSPECIFIED;
2557 
2558 	override = thread->overrides;
2559 	while (override) {
2560 		qos_override = MAX(qos_override, override->override_qos);
2561 		override = override->override_next;
2562 	}
2563 
2564 	return qos_override;
2565 }
2566 
2567 /*
2568  * Returns:
2569  * - 0 on success
2570  * - EINVAL if some invalid input was passed
2571  */
2572 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2573 proc_thread_qos_add_override_internal(thread_t         thread,
2574     int              override_qos,
2575     boolean_t        first_override_for_resource,
2576     user_addr_t      resource,
2577     int              resource_type)
2578 {
2579 	struct task_pend_token pend_token = {};
2580 	int rc = 0;
2581 
2582 	thread_mtx_lock(thread);
2583 
2584 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2585 	    thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2586 
2587 	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2588 	    uint64_t, thread->requested_policy.thrp_qos,
2589 	    uint64_t, thread->effective_policy.thep_qos,
2590 	    int, override_qos, boolean_t, first_override_for_resource);
2591 
2592 	struct thread_qos_override *override;
2593 	struct thread_qos_override *override_new = NULL;
2594 	int new_qos_override, prev_qos_override;
2595 	int new_effective_qos;
2596 
2597 	canonicalize_resource_and_type(&resource, &resource_type);
2598 
2599 	override = find_qos_override(thread, resource, resource_type);
2600 	if (first_override_for_resource && !override) {
2601 		/* We need to allocate a new object. Drop the thread lock and
2602 		 * recheck afterwards in case someone else added the override
2603 		 */
2604 		thread_mtx_unlock(thread);
2605 		override_new = zalloc(thread_qos_override_zone);
2606 		thread_mtx_lock(thread);
2607 		override = find_qos_override(thread, resource, resource_type);
2608 	}
2609 	if (first_override_for_resource && override) {
2610 		/* Someone else already allocated while the thread lock was dropped */
2611 		override->override_contended_resource_count++;
2612 	} else if (!override && override_new) {
2613 		override = override_new;
2614 		override_new = NULL;
2615 		override->override_next = thread->overrides;
2616 		/* since first_override_for_resource was TRUE */
2617 		override->override_contended_resource_count = 1;
2618 		override->override_resource = resource;
2619 		override->override_resource_type = (int16_t)resource_type;
2620 		override->override_qos = THREAD_QOS_UNSPECIFIED;
2621 		thread->overrides = override;
2622 	}
2623 
2624 	if (override) {
2625 		if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2626 			override->override_qos = (int16_t)override_qos;
2627 		} else {
2628 			override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2629 		}
2630 	}
2631 
2632 	/* Determine how to combine the various overrides into a single current
2633 	 * requested override
2634 	 */
2635 	new_qos_override = calculate_requested_qos_override(thread);
2636 
2637 	prev_qos_override = proc_get_thread_policy_locked(thread,
2638 	    TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2639 
2640 	if (new_qos_override != prev_qos_override) {
2641 		proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2642 		    TASK_POLICY_QOS_OVERRIDE,
2643 		    new_qos_override, 0, &pend_token);
2644 	}
2645 
2646 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2647 
2648 	thread_mtx_unlock(thread);
2649 
2650 	thread_policy_update_complete_unlocked(thread, &pend_token);
2651 
2652 	if (override_new) {
2653 		zfree(thread_qos_override_zone, override_new);
2654 	}
2655 
2656 	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2657 	    int, new_qos_override, int, new_effective_qos, int, rc);
2658 
2659 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2660 	    new_qos_override, resource, resource_type, 0, 0);
2661 
2662 	return rc;
2663 }
2664 
2665 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2666 proc_thread_qos_add_override(task_t           task,
2667     thread_t         thread,
2668     uint64_t         tid,
2669     int              override_qos,
2670     boolean_t        first_override_for_resource,
2671     user_addr_t      resource,
2672     int              resource_type)
2673 {
2674 	boolean_t has_thread_reference = FALSE;
2675 	int rc = 0;
2676 
2677 	if (thread == THREAD_NULL) {
2678 		thread = task_findtid(task, tid);
2679 		/* returns referenced thread */
2680 
2681 		if (thread == THREAD_NULL) {
2682 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2683 			    tid, 0, 0xdead, 0, 0);
2684 			return ESRCH;
2685 		}
2686 		has_thread_reference = TRUE;
2687 	} else {
2688 		assert(get_threadtask(thread) == task);
2689 	}
2690 	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2691 	    first_override_for_resource, resource, resource_type);
2692 	if (has_thread_reference) {
2693 		thread_deallocate(thread);
2694 	}
2695 
2696 	return rc;
2697 }
2698 
2699 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2700 proc_thread_qos_remove_override_internal(thread_t       thread,
2701     user_addr_t    resource,
2702     int            resource_type,
2703     boolean_t      reset)
2704 {
2705 	struct task_pend_token pend_token = {};
2706 
2707 	struct thread_qos_override *deferred_free_override_list = NULL;
2708 	int new_qos_override, prev_qos_override, new_effective_qos;
2709 
2710 	thread_mtx_lock(thread);
2711 
2712 	canonicalize_resource_and_type(&resource, &resource_type);
2713 
2714 	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2715 
2716 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2717 	    thread_tid(thread), resource, reset, 0, 0);
2718 
2719 	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2720 	    uint64_t, thread->requested_policy.thrp_qos,
2721 	    uint64_t, thread->effective_policy.thep_qos);
2722 
2723 	/* Determine how to combine the various overrides into a single current requested override */
2724 	new_qos_override = calculate_requested_qos_override(thread);
2725 
2726 	spl_t s = splsched();
2727 	thread_lock(thread);
2728 
2729 	/*
2730 	 * The override chain and therefore the value of the current override is locked with thread mutex,
2731 	 * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2732 	 * This means you can't change the current override from a spinlock-only setter.
2733 	 */
2734 	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2735 
2736 	if (new_qos_override != prev_qos_override) {
2737 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2738 	}
2739 
2740 	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2741 
2742 	thread_unlock(thread);
2743 	splx(s);
2744 
2745 	thread_mtx_unlock(thread);
2746 
2747 	thread_policy_update_complete_unlocked(thread, &pend_token);
2748 
2749 	while (deferred_free_override_list) {
2750 		struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2751 
2752 		zfree(thread_qos_override_zone, deferred_free_override_list);
2753 		deferred_free_override_list = override_next;
2754 	}
2755 
2756 	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2757 	    int, new_qos_override, int, new_effective_qos);
2758 
2759 	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2760 	    thread_tid(thread), 0, 0, 0, 0);
2761 }
2762 
2763 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2764 proc_thread_qos_remove_override(task_t      task,
2765     thread_t    thread,
2766     uint64_t    tid,
2767     user_addr_t resource,
2768     int         resource_type)
2769 {
2770 	boolean_t has_thread_reference = FALSE;
2771 
2772 	if (thread == THREAD_NULL) {
2773 		thread = task_findtid(task, tid);
2774 		/* returns referenced thread */
2775 
2776 		if (thread == THREAD_NULL) {
2777 			KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2778 			    tid, 0, 0xdead, 0, 0);
2779 			return ESRCH;
2780 		}
2781 		has_thread_reference = TRUE;
2782 	} else {
2783 		assert(task == get_threadtask(thread));
2784 	}
2785 
2786 	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2787 
2788 	if (has_thread_reference) {
2789 		thread_deallocate(thread);
2790 	}
2791 
2792 	return 0;
2793 }
2794 
2795 /* Deallocate before thread termination */
2796 void
proc_thread_qos_deallocate(thread_t thread)2797 proc_thread_qos_deallocate(thread_t thread)
2798 {
2799 	/* This thread must have no more IPC overrides. */
2800 	assert(thread->kevent_overrides == 0);
2801 	assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2802 	assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2803 
2804 	/*
2805 	 * Clear out any lingering override objects.
2806 	 */
2807 	struct thread_qos_override *override;
2808 
2809 	thread_mtx_lock(thread);
2810 	override = thread->overrides;
2811 	thread->overrides = NULL;
2812 	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2813 	/* We don't need to re-evaluate thread policy here because the thread has already exited */
2814 	thread_mtx_unlock(thread);
2815 
2816 	while (override) {
2817 		struct thread_qos_override *override_next = override->override_next;
2818 
2819 		zfree(thread_qos_override_zone, override);
2820 		override = override_next;
2821 	}
2822 }
2823 
2824 /*
2825  * Set up the primordial thread's QoS
2826  */
2827 void
task_set_main_thread_qos(task_t task,thread_t thread)2828 task_set_main_thread_qos(task_t task, thread_t thread)
2829 {
2830 	struct task_pend_token pend_token = {};
2831 
2832 	assert(get_threadtask(thread) == task);
2833 
2834 	thread_mtx_lock(thread);
2835 
2836 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2837 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2838 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2839 	    thread->requested_policy.thrp_qos, 0);
2840 
2841 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2842 
2843 	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2844 	    primordial_qos, 0, &pend_token);
2845 
2846 	thread_mtx_unlock(thread);
2847 
2848 	thread_policy_update_complete_unlocked(thread, &pend_token);
2849 
2850 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2851 	    (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2852 	    thread_tid(thread), threquested_0(thread), threquested_1(thread),
2853 	    primordial_qos, 0);
2854 }
2855 
2856 /*
2857  * KPI for pthread kext
2858  *
2859  * Return a good guess at what the initial manager QoS will be
2860  * Dispatch can override this in userspace if it so chooses
2861  */
2862 thread_qos_t
task_get_default_manager_qos(task_t task)2863 task_get_default_manager_qos(task_t task)
2864 {
2865 	thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2866 
2867 	if (primordial_qos == THREAD_QOS_LEGACY) {
2868 		primordial_qos = THREAD_QOS_USER_INITIATED;
2869 	}
2870 
2871 	return primordial_qos;
2872 }
2873 
2874 /*
2875  * Check if the kernel promotion on thread has changed
2876  * and apply it.
2877  *
2878  * thread locked on entry and exit
2879  */
2880 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2881 thread_recompute_kernel_promotion_locked(thread_t thread)
2882 {
2883 	boolean_t needs_update = FALSE;
2884 	uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2885 
2886 	/*
2887 	 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2888 	 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2889 	 * and propagates the priority through the chain with the same cap, because as of now it does
2890 	 * not differenciate on the kernel primitive.
2891 	 *
2892 	 * If this assumption will change with the adoption of a kernel primitive that does not
2893 	 * cap the when adding/propagating,
2894 	 * then here is the place to put the generic cap for all kernel primitives
2895 	 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2896 	 */
2897 	assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2898 
2899 	if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2900 		KDBG(MACHDBG_CODE(
2901 			    DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2902 		    thread_tid(thread),
2903 		    kern_promotion_schedpri,
2904 		    thread->kern_promotion_schedpri);
2905 
2906 		needs_update = TRUE;
2907 		thread->kern_promotion_schedpri = kern_promotion_schedpri;
2908 		thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2909 	}
2910 
2911 	return needs_update;
2912 }
2913 
2914 /*
2915  * Check if the user promotion on thread has changed
2916  * and apply it.
2917  *
2918  * thread locked on entry, might drop the thread lock
2919  * and reacquire it.
2920  */
2921 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)2922 thread_recompute_user_promotion_locked(thread_t thread)
2923 {
2924 	boolean_t needs_update = FALSE;
2925 	struct task_pend_token pend_token = {};
2926 	uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2927 	int old_base_pri = thread->base_pri;
2928 	thread_qos_t qos_promotion;
2929 
2930 	/* Check if user promotion has changed */
2931 	if (thread->user_promotion_basepri == user_promotion_basepri) {
2932 		return needs_update;
2933 	} else {
2934 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2935 		    (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2936 		    thread_tid(thread),
2937 		    user_promotion_basepri,
2938 		    thread->user_promotion_basepri,
2939 		    0, 0);
2940 		KDBG(MACHDBG_CODE(
2941 			    DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2942 		    thread_tid(thread),
2943 		    user_promotion_basepri,
2944 		    thread->user_promotion_basepri);
2945 	}
2946 
2947 	/* Update the user promotion base pri */
2948 	thread->user_promotion_basepri = user_promotion_basepri;
2949 	pend_token.tpt_force_recompute_pri = 1;
2950 
2951 	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2952 		qos_promotion = THREAD_QOS_UNSPECIFIED;
2953 	} else {
2954 		qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2955 	}
2956 
2957 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2958 	    TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2959 
2960 	if (thread_get_waiting_turnstile(thread) &&
2961 	    thread->base_pri != old_base_pri) {
2962 		needs_update = TRUE;
2963 	}
2964 
2965 	thread_unlock(thread);
2966 
2967 	thread_policy_update_complete_unlocked(thread, &pend_token);
2968 
2969 	thread_lock(thread);
2970 
2971 	return needs_update;
2972 }
2973 
2974 /*
2975  * Convert the thread user promotion base pri to qos for threads in qos world.
2976  * For priority above UI qos, the qos would be set to UI.
2977  */
2978 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)2979 thread_user_promotion_qos_for_pri(int priority)
2980 {
2981 	thread_qos_t qos;
2982 	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2983 		if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2984 			return qos;
2985 		}
2986 	}
2987 	return THREAD_QOS_MAINTENANCE;
2988 }
2989 
2990 /*
2991  * Set the thread's QoS Kevent override
2992  * Owned by the Kevent subsystem
2993  *
2994  * May be called with spinlocks held, but not spinlocks
2995  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2996  *
2997  * One 'add' must be balanced by one 'drop'.
2998  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2999  * Before the thread is deallocated, there must be 0 remaining overrides.
3000  */
3001 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3002 thread_kevent_override(thread_t    thread,
3003     uint32_t    qos_override,
3004     boolean_t   is_new_override)
3005 {
3006 	struct task_pend_token pend_token = {};
3007 	boolean_t needs_update;
3008 
3009 	spl_t s = splsched();
3010 	thread_lock(thread);
3011 
3012 	uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3013 
3014 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3015 	assert(qos_override < THREAD_QOS_LAST);
3016 
3017 	if (is_new_override) {
3018 		if (thread->kevent_overrides++ == 0) {
3019 			/* This add is the first override for this thread */
3020 			assert(old_override == THREAD_QOS_UNSPECIFIED);
3021 		} else {
3022 			/* There are already other overrides in effect for this thread */
3023 			assert(old_override > THREAD_QOS_UNSPECIFIED);
3024 		}
3025 	} else {
3026 		/* There must be at least one override (the previous add call) in effect */
3027 		assert(thread->kevent_overrides > 0);
3028 		assert(old_override > THREAD_QOS_UNSPECIFIED);
3029 	}
3030 
3031 	/*
3032 	 * We can't allow lowering if there are several IPC overrides because
3033 	 * the caller can't possibly know the whole truth
3034 	 */
3035 	if (thread->kevent_overrides == 1) {
3036 		needs_update = qos_override != old_override;
3037 	} else {
3038 		needs_update = qos_override > old_override;
3039 	}
3040 
3041 	if (needs_update) {
3042 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3043 		    TASK_POLICY_QOS_KEVENT_OVERRIDE,
3044 		    qos_override, 0, &pend_token);
3045 		assert(pend_token.tpt_update_sockets == 0);
3046 	}
3047 
3048 	thread_unlock(thread);
3049 	splx(s);
3050 
3051 	thread_policy_update_complete_unlocked(thread, &pend_token);
3052 }
3053 
3054 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3055 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3056 {
3057 	thread_kevent_override(thread, qos_override, TRUE);
3058 }
3059 
3060 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3061 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3062 {
3063 	thread_kevent_override(thread, qos_override, FALSE);
3064 }
3065 
3066 void
thread_drop_kevent_override(thread_t thread)3067 thread_drop_kevent_override(thread_t thread)
3068 {
3069 	struct task_pend_token pend_token = {};
3070 
3071 	spl_t s = splsched();
3072 	thread_lock(thread);
3073 
3074 	assert(thread->kevent_overrides > 0);
3075 
3076 	if (--thread->kevent_overrides == 0) {
3077 		/*
3078 		 * There are no more overrides for this thread, so we should
3079 		 * clear out the saturated override value
3080 		 */
3081 
3082 		proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3083 		    TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3084 		    0, &pend_token);
3085 	}
3086 
3087 	thread_unlock(thread);
3088 	splx(s);
3089 
3090 	thread_policy_update_complete_unlocked(thread, &pend_token);
3091 }
3092 
3093 /*
3094  * Set the thread's QoS Workloop Servicer override
3095  * Owned by the Kevent subsystem
3096  *
3097  * May be called with spinlocks held, but not spinlocks
3098  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3099  *
3100  * One 'add' must be balanced by one 'drop'.
3101  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3102  * Before the thread is deallocated, there must be 0 remaining overrides.
3103  */
3104 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3105 thread_servicer_override(thread_t    thread,
3106     uint32_t    qos_override,
3107     boolean_t   is_new_override)
3108 {
3109 	struct task_pend_token pend_token = {};
3110 
3111 	spl_t s = splsched();
3112 	thread_lock(thread);
3113 
3114 	if (is_new_override) {
3115 		assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3116 	} else {
3117 		assert(thread->requested_policy.thrp_qos_wlsvc_override);
3118 	}
3119 
3120 	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3121 	    TASK_POLICY_QOS_SERVICER_OVERRIDE,
3122 	    qos_override, 0, &pend_token);
3123 
3124 	thread_unlock(thread);
3125 	splx(s);
3126 
3127 	assert(pend_token.tpt_update_sockets == 0);
3128 	thread_policy_update_complete_unlocked(thread, &pend_token);
3129 }
3130 
3131 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3132 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3133 {
3134 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3135 	assert(qos_override < THREAD_QOS_LAST);
3136 
3137 	thread_servicer_override(thread, qos_override, TRUE);
3138 }
3139 
3140 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3141 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3142 {
3143 	assert(qos_override > THREAD_QOS_UNSPECIFIED);
3144 	assert(qos_override < THREAD_QOS_LAST);
3145 
3146 	thread_servicer_override(thread, qos_override, FALSE);
3147 }
3148 
3149 void
thread_drop_servicer_override(thread_t thread)3150 thread_drop_servicer_override(thread_t thread)
3151 {
3152 	thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3153 }
3154 
3155 
3156 /* Get current requested qos / relpri, may be called from spinlock context */
3157 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3158 thread_get_requested_qos(thread_t thread, int *relpri)
3159 {
3160 	int relprio_value = 0;
3161 	thread_qos_t qos;
3162 
3163 	qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3164 	    TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3165 	if (relpri) {
3166 		*relpri = -relprio_value;
3167 	}
3168 	return qos;
3169 }
3170 
3171 /*
3172  * This function will promote the thread priority
3173  * since exec could block other threads calling
3174  * proc_find on the proc. This boost must be removed
3175  * via call to thread_clear_exec_promotion.
3176  *
3177  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3178  */
3179 void
thread_set_exec_promotion(thread_t thread)3180 thread_set_exec_promotion(thread_t thread)
3181 {
3182 	spl_t s = splsched();
3183 	thread_lock(thread);
3184 
3185 	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3186 
3187 	thread_unlock(thread);
3188 	splx(s);
3189 }
3190 
3191 /*
3192  * This function will clear the exec thread
3193  * promotion set on the thread by thread_set_exec_promotion.
3194  */
3195 void
thread_clear_exec_promotion(thread_t thread)3196 thread_clear_exec_promotion(thread_t thread)
3197 {
3198 	spl_t s = splsched();
3199 	thread_lock(thread);
3200 
3201 	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3202 
3203 	thread_unlock(thread);
3204 	splx(s);
3205 }
3206