1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
45 struct thread_qos_override, KT_DEFAULT);
46
47 #ifdef MACH_BSD
48 extern int proc_selfpid(void);
49 extern char * proc_name_address(void *p);
50 extern void rethrottle_thread(void * uthread);
51 #endif /* MACH_BSD */
52
53 #define QOS_EXTRACT(q) ((q) & 0xff)
54
55 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
56 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
58 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
59
60 TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
61 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
62
63 static void
64 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
65
66 const int thread_default_iotier_override = THROTTLE_LEVEL_END;
67
68 const struct thread_requested_policy default_thread_requested_policy = {
69 .thrp_iotier_kevent_override = thread_default_iotier_override
70 };
71
72 /*
73 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
74 * to threads that don't have a QoS class set.
75 */
76 const qos_policy_params_t thread_qos_policy_params = {
77 /*
78 * This table defines the starting base priority of the thread,
79 * which will be modified by the thread importance and the task max priority
80 * before being applied.
81 */
82 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
83 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
84 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
85 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
86 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
87 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
88 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
89
90 /*
91 * This table defines the highest IO priority that a thread marked with this
92 * QoS class can have.
93 */
94 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
95 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
96 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
99 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
100 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
101
102 /*
103 * This table defines the highest QoS level that
104 * a thread marked with this QoS class can have.
105 */
106
107 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
108 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
109 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
110 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
112 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
113 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114
115 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
116 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
117 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
118 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
120 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 };
123
124 static void
125 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
126
127 static int
128 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
129
130 static void
131 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
132
133 static void
134 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
135
136 static void
137 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
138
139 static void
140 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
141
142 static int
143 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
144
145 static int
146 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
147
148 static void
149 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
150
151 static void
152 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
153
154 boolean_t
thread_has_qos_policy(thread_t thread)155 thread_has_qos_policy(thread_t thread)
156 {
157 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
158 }
159
160
161 static void
thread_remove_qos_policy_locked(thread_t thread,task_pend_token_t pend_token)162 thread_remove_qos_policy_locked(thread_t thread,
163 task_pend_token_t pend_token)
164 {
165 __unused int prev_qos = thread->requested_policy.thrp_qos;
166
167 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
168
169 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
170 THREAD_QOS_UNSPECIFIED, 0, pend_token);
171 }
172
173 kern_return_t
thread_remove_qos_policy(thread_t thread)174 thread_remove_qos_policy(thread_t thread)
175 {
176 struct task_pend_token pend_token = {};
177
178 thread_mtx_lock(thread);
179 if (!thread->active) {
180 thread_mtx_unlock(thread);
181 return KERN_TERMINATED;
182 }
183
184 thread_remove_qos_policy_locked(thread, &pend_token);
185
186 thread_mtx_unlock(thread);
187
188 thread_policy_update_complete_unlocked(thread, &pend_token);
189
190 return KERN_SUCCESS;
191 }
192
193
194 boolean_t
thread_is_static_param(thread_t thread)195 thread_is_static_param(thread_t thread)
196 {
197 if (thread->static_param) {
198 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
199 return TRUE;
200 }
201 return FALSE;
202 }
203
204 /*
205 * Relative priorities can range between 0REL and -15REL. These
206 * map to QoS-specific ranges, to create non-overlapping priority
207 * ranges.
208 */
209 static int
thread_qos_scaled_relative_priority(int qos,int qos_relprio)210 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
211 {
212 int next_lower_qos;
213
214 /* Fast path, since no validation or scaling is needed */
215 if (qos_relprio == 0) {
216 return 0;
217 }
218
219 switch (qos) {
220 case THREAD_QOS_USER_INTERACTIVE:
221 next_lower_qos = THREAD_QOS_USER_INITIATED;
222 break;
223 case THREAD_QOS_USER_INITIATED:
224 next_lower_qos = THREAD_QOS_LEGACY;
225 break;
226 case THREAD_QOS_LEGACY:
227 next_lower_qos = THREAD_QOS_UTILITY;
228 break;
229 case THREAD_QOS_UTILITY:
230 next_lower_qos = THREAD_QOS_BACKGROUND;
231 break;
232 case THREAD_QOS_MAINTENANCE:
233 case THREAD_QOS_BACKGROUND:
234 next_lower_qos = 0;
235 break;
236 default:
237 panic("Unrecognized QoS %d", qos);
238 return 0;
239 }
240
241 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
242 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
243
244 /*
245 * We now have the valid range that the scaled relative priority can map to. Note
246 * that the lower bound is exclusive, but the upper bound is inclusive. If the
247 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
248 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
249 * remainder.
250 */
251 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
252
253 return scaled_relprio;
254 }
255
256 /*
257 * flag set by -qos-policy-allow boot-arg to allow
258 * testing thread qos policy from userspace
259 */
260 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
261
262 kern_return_t
thread_policy_set(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)263 thread_policy_set(
264 thread_t thread,
265 thread_policy_flavor_t flavor,
266 thread_policy_t policy_info,
267 mach_msg_type_number_t count)
268 {
269 thread_qos_policy_data_t req_qos;
270 kern_return_t kr;
271
272 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
273
274 if (thread == THREAD_NULL) {
275 return KERN_INVALID_ARGUMENT;
276 }
277
278 if (!allow_qos_policy_set) {
279 if (thread_is_static_param(thread)) {
280 return KERN_POLICY_STATIC;
281 }
282
283 if (flavor == THREAD_QOS_POLICY) {
284 return KERN_INVALID_ARGUMENT;
285 }
286
287 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
288 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
289 return KERN_INVALID_ARGUMENT;
290 }
291 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
292 if (info->priority != BASEPRI_RTQUEUES) {
293 return KERN_INVALID_ARGUMENT;
294 }
295 }
296 }
297
298 /* Threads without static_param set reset their QoS when other policies are applied. */
299 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
300 /* Store the existing tier, if we fail this call it is used to reset back. */
301 req_qos.qos_tier = thread->requested_policy.thrp_qos;
302 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
303
304 kr = thread_remove_qos_policy(thread);
305 if (kr != KERN_SUCCESS) {
306 return kr;
307 }
308 }
309
310 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
311
312 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
313 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
314 if (kr != KERN_SUCCESS) {
315 /* Reset back to our original tier as the set failed. */
316 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
317 }
318 }
319
320 return kr;
321 }
322
323 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
324 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
325 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
326 static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
327
328 kern_return_t
thread_policy_set_internal(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t count)329 thread_policy_set_internal(
330 thread_t thread,
331 thread_policy_flavor_t flavor,
332 thread_policy_t policy_info,
333 mach_msg_type_number_t count)
334 {
335 kern_return_t result = KERN_SUCCESS;
336 struct task_pend_token pend_token = {};
337
338 thread_mtx_lock(thread);
339 if (!thread->active) {
340 thread_mtx_unlock(thread);
341
342 return KERN_TERMINATED;
343 }
344
345 switch (flavor) {
346 case THREAD_EXTENDED_POLICY:
347 {
348 boolean_t timeshare = TRUE;
349
350 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
351 thread_extended_policy_t info;
352
353 info = (thread_extended_policy_t)policy_info;
354 timeshare = info->timeshare;
355 }
356
357 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
358
359 spl_t s = splsched();
360 thread_lock(thread);
361
362 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
363
364 thread_unlock(thread);
365 splx(s);
366
367 pend_token.tpt_update_thread_sfi = 1;
368
369 break;
370 }
371
372 case THREAD_TIME_CONSTRAINT_POLICY:
373 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
374 {
375 thread_time_constraint_with_priority_policy_t info;
376
377 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
378 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
379 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
380
381 if (count < min_count) {
382 result = KERN_INVALID_ARGUMENT;
383 break;
384 }
385
386 info = (thread_time_constraint_with_priority_policy_t)policy_info;
387
388
389 if (info->constraint < info->computation ||
390 info->computation > max_rt_quantum ||
391 info->computation < min_rt_quantum) {
392 result = KERN_INVALID_ARGUMENT;
393 break;
394 }
395
396 if (info->computation < (info->constraint / 2)) {
397 info->computation = (info->constraint / 2);
398 if (info->computation > max_rt_quantum) {
399 info->computation = max_rt_quantum;
400 }
401 }
402
403 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
404 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
405 result = KERN_INVALID_ARGUMENT;
406 break;
407 }
408 }
409
410 spl_t s = splsched();
411 thread_lock(thread);
412
413 thread->realtime.period = info->period;
414 thread->realtime.computation = info->computation;
415 thread->realtime.constraint = info->constraint;
416 thread->realtime.preemptible = info->preemptible;
417 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
418 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
419 } else {
420 thread->realtime.priority_offset = 0;
421 /* Or check for override from allowed thread group? */
422 }
423
424 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
425
426 thread_unlock(thread);
427 splx(s);
428
429 pend_token.tpt_update_thread_sfi = 1;
430
431 break;
432 }
433
434 case THREAD_PRECEDENCE_POLICY:
435 {
436 thread_precedence_policy_t info;
437
438 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
439 result = KERN_INVALID_ARGUMENT;
440 break;
441 }
442 info = (thread_precedence_policy_t)policy_info;
443
444 spl_t s = splsched();
445 thread_lock(thread);
446
447 thread->importance = info->importance;
448
449 thread_recompute_priority(thread);
450
451 thread_unlock(thread);
452 splx(s);
453
454 break;
455 }
456
457 case THREAD_AFFINITY_POLICY:
458 {
459 thread_affinity_policy_t info;
460
461 if (!thread_affinity_is_supported()) {
462 result = KERN_NOT_SUPPORTED;
463 break;
464 }
465 if (count < THREAD_AFFINITY_POLICY_COUNT) {
466 result = KERN_INVALID_ARGUMENT;
467 break;
468 }
469
470 info = (thread_affinity_policy_t) policy_info;
471 /*
472 * Unlock the thread mutex here and
473 * return directly after calling thread_affinity_set().
474 * This is necessary for correct lock ordering because
475 * thread_affinity_set() takes the task lock.
476 */
477 thread_mtx_unlock(thread);
478 return thread_affinity_set(thread, info->affinity_tag);
479 }
480
481 #if !defined(XNU_TARGET_OS_OSX)
482 case THREAD_BACKGROUND_POLICY:
483 {
484 thread_background_policy_t info;
485
486 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
487 result = KERN_INVALID_ARGUMENT;
488 break;
489 }
490
491 if (get_threadtask(thread) != current_task()) {
492 result = KERN_PROTECTION_FAILURE;
493 break;
494 }
495
496 info = (thread_background_policy_t) policy_info;
497
498 int enable;
499
500 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
501 enable = TASK_POLICY_ENABLE;
502 } else {
503 enable = TASK_POLICY_DISABLE;
504 }
505
506 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
507
508 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
509
510 break;
511 }
512 #endif /* !defined(XNU_TARGET_OS_OSX) */
513
514 case THREAD_THROUGHPUT_QOS_POLICY:
515 {
516 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
517 thread_throughput_qos_t tqos;
518
519 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
520 result = KERN_INVALID_ARGUMENT;
521 break;
522 }
523
524 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
525 break;
526 }
527
528 tqos = qos_extract(info->thread_throughput_qos_tier);
529
530 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
531 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
532
533 break;
534 }
535
536 case THREAD_LATENCY_QOS_POLICY:
537 {
538 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
539 thread_latency_qos_t lqos;
540
541 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
542 result = KERN_INVALID_ARGUMENT;
543 break;
544 }
545
546 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
547 break;
548 }
549
550 lqos = qos_extract(info->thread_latency_qos_tier);
551
552 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
553 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
554
555 break;
556 }
557
558 case THREAD_QOS_POLICY:
559 {
560 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
561
562 if (count < THREAD_QOS_POLICY_COUNT) {
563 result = KERN_INVALID_ARGUMENT;
564 break;
565 }
566
567 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
568 result = KERN_INVALID_ARGUMENT;
569 break;
570 }
571
572 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
573 result = KERN_INVALID_ARGUMENT;
574 break;
575 }
576
577 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
578 result = KERN_INVALID_ARGUMENT;
579 break;
580 }
581
582 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
583 info->qos_tier, -info->tier_importance, &pend_token);
584
585 break;
586 }
587
588 default:
589 result = KERN_INVALID_ARGUMENT;
590 break;
591 }
592
593 thread_mtx_unlock(thread);
594
595 thread_policy_update_complete_unlocked(thread, &pend_token);
596
597 return result;
598 }
599
600 /*
601 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
602 * Both result in FIXED mode scheduling.
603 */
604 static sched_mode_t
convert_policy_to_sched_mode(integer_t policy)605 convert_policy_to_sched_mode(integer_t policy)
606 {
607 switch (policy) {
608 case POLICY_TIMESHARE:
609 return TH_MODE_TIMESHARE;
610 case POLICY_RR:
611 case POLICY_FIFO:
612 return TH_MODE_FIXED;
613 default:
614 panic("unexpected sched policy: %d", policy);
615 return TH_MODE_NONE;
616 }
617 }
618
619 /*
620 * Called either with the thread mutex locked
621 * or from the pthread kext in a 'safe place'.
622 */
623 static kern_return_t
thread_set_mode_and_absolute_pri_internal(thread_t thread,sched_mode_t mode,integer_t priority,task_pend_token_t pend_token)624 thread_set_mode_and_absolute_pri_internal(thread_t thread,
625 sched_mode_t mode,
626 integer_t priority,
627 task_pend_token_t pend_token)
628 {
629 kern_return_t kr = KERN_SUCCESS;
630
631 spl_t s = splsched();
632 thread_lock(thread);
633
634 /* This path isn't allowed to change a thread out of realtime. */
635 if ((thread->sched_mode == TH_MODE_REALTIME) ||
636 (thread->saved_mode == TH_MODE_REALTIME)) {
637 kr = KERN_FAILURE;
638 goto unlock;
639 }
640
641 if (thread->policy_reset) {
642 kr = KERN_SUCCESS;
643 goto unlock;
644 }
645
646 sched_mode_t old_mode = thread->sched_mode;
647
648 /*
649 * Reverse engineer and apply the correct importance value
650 * from the requested absolute priority value.
651 *
652 * TODO: Store the absolute priority value instead
653 */
654
655 if (priority >= thread->max_priority) {
656 priority = thread->max_priority - thread->task_priority;
657 } else if (priority >= MINPRI_KERNEL) {
658 priority -= MINPRI_KERNEL;
659 } else if (priority >= MINPRI_RESERVED) {
660 priority -= MINPRI_RESERVED;
661 } else {
662 priority -= BASEPRI_DEFAULT;
663 }
664
665 priority += thread->task_priority;
666
667 if (priority > thread->max_priority) {
668 priority = thread->max_priority;
669 } else if (priority < MINPRI) {
670 priority = MINPRI;
671 }
672
673 thread->importance = priority - thread->task_priority;
674
675 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
676
677 if (mode != old_mode) {
678 pend_token->tpt_update_thread_sfi = 1;
679 }
680
681 unlock:
682 thread_unlock(thread);
683 splx(s);
684
685 return kr;
686 }
687
688 void
thread_freeze_base_pri(thread_t thread)689 thread_freeze_base_pri(thread_t thread)
690 {
691 assert(thread == current_thread());
692
693 spl_t s = splsched();
694 thread_lock(thread);
695
696 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
697 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
698
699 thread_unlock(thread);
700 splx(s);
701 }
702
703 bool
thread_unfreeze_base_pri(thread_t thread)704 thread_unfreeze_base_pri(thread_t thread)
705 {
706 assert(thread == current_thread());
707 integer_t base_pri;
708 ast_t ast = 0;
709
710 spl_t s = splsched();
711 thread_lock(thread);
712
713 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
714 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
715
716 base_pri = thread->req_base_pri;
717 if (base_pri != thread->base_pri) {
718 /*
719 * This function returns "true" if the base pri change
720 * is the most likely cause for the preemption.
721 */
722 sched_set_thread_base_priority(thread, base_pri);
723 ast = ast_peek(AST_PREEMPT);
724 }
725
726 thread_unlock(thread);
727 splx(s);
728
729 return ast != 0;
730 }
731
732 uint8_t
thread_workq_pri_for_qos(thread_qos_t qos)733 thread_workq_pri_for_qos(thread_qos_t qos)
734 {
735 assert(qos < THREAD_QOS_LAST);
736 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
737 }
738
739 thread_qos_t
thread_workq_qos_for_pri(int priority)740 thread_workq_qos_for_pri(int priority)
741 {
742 thread_qos_t qos;
743 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
744 // indicate that workq should map >UI threads to workq's
745 // internal notation for above-UI work.
746 return THREAD_QOS_UNSPECIFIED;
747 }
748 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
749 // map a given priority up to the next nearest qos band.
750 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
751 return qos;
752 }
753 }
754 return THREAD_QOS_MAINTENANCE;
755 }
756
757 /*
758 * private interface for pthread workqueues
759 *
760 * Set scheduling policy & absolute priority for thread
761 * May be called with spinlocks held
762 * Thread mutex lock is not held
763 */
764 void
thread_reset_workq_qos(thread_t thread,uint32_t qos)765 thread_reset_workq_qos(thread_t thread, uint32_t qos)
766 {
767 struct task_pend_token pend_token = {};
768
769 assert(qos < THREAD_QOS_LAST);
770
771 spl_t s = splsched();
772 thread_lock(thread);
773
774 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
775 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
776 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
777 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
778 &pend_token);
779
780 assert(pend_token.tpt_update_sockets == 0);
781
782 thread_unlock(thread);
783 splx(s);
784
785 thread_policy_update_complete_unlocked(thread, &pend_token);
786 }
787
788 /*
789 * private interface for pthread workqueues
790 *
791 * Set scheduling policy & absolute priority for thread
792 * May be called with spinlocks held
793 * Thread mutex lock is held
794 */
795 void
thread_set_workq_override(thread_t thread,uint32_t qos)796 thread_set_workq_override(thread_t thread, uint32_t qos)
797 {
798 struct task_pend_token pend_token = {};
799
800 assert(qos < THREAD_QOS_LAST);
801
802 spl_t s = splsched();
803 thread_lock(thread);
804
805 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
806 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
807
808 assert(pend_token.tpt_update_sockets == 0);
809
810 thread_unlock(thread);
811 splx(s);
812
813 thread_policy_update_complete_unlocked(thread, &pend_token);
814 }
815
816 /*
817 * private interface for pthread workqueues
818 *
819 * Set scheduling policy & absolute priority for thread
820 * May be called with spinlocks held
821 * Thread mutex lock is not held
822 */
823 void
thread_set_workq_pri(thread_t thread,thread_qos_t qos,integer_t priority,integer_t policy)824 thread_set_workq_pri(thread_t thread,
825 thread_qos_t qos,
826 integer_t priority,
827 integer_t policy)
828 {
829 struct task_pend_token pend_token = {};
830 sched_mode_t mode = convert_policy_to_sched_mode(policy);
831
832 assert(qos < THREAD_QOS_LAST);
833 assert(thread->static_param);
834
835 if (!thread->static_param || !thread->active) {
836 return;
837 }
838
839 spl_t s = splsched();
840 thread_lock(thread);
841
842 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
843 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
844 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
845 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
846 0, &pend_token);
847
848 thread_unlock(thread);
849 splx(s);
850
851 /* Concern: this doesn't hold the mutex... */
852
853 __assert_only kern_return_t kr;
854 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
855 &pend_token);
856 assert(kr == KERN_SUCCESS);
857
858 if (pend_token.tpt_update_thread_sfi) {
859 sfi_reevaluate(thread);
860 }
861 }
862
863 /*
864 * thread_set_mode_and_absolute_pri:
865 *
866 * Set scheduling policy & absolute priority for thread, for deprecated
867 * thread_set_policy and thread_policy interfaces.
868 *
869 * Called with nothing locked.
870 */
871 kern_return_t
thread_set_mode_and_absolute_pri(thread_t thread,integer_t policy,integer_t priority)872 thread_set_mode_and_absolute_pri(thread_t thread,
873 integer_t policy,
874 integer_t priority)
875 {
876 kern_return_t kr = KERN_SUCCESS;
877 struct task_pend_token pend_token = {};
878
879 sched_mode_t mode = convert_policy_to_sched_mode(policy);
880
881 thread_mtx_lock(thread);
882
883 if (!thread->active) {
884 kr = KERN_TERMINATED;
885 goto unlock;
886 }
887
888 if (thread_is_static_param(thread)) {
889 kr = KERN_POLICY_STATIC;
890 goto unlock;
891 }
892
893 /* Setting legacy policies on threads kills the current QoS */
894 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
895 thread_remove_qos_policy_locked(thread, &pend_token);
896 }
897
898 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
899
900 unlock:
901 thread_mtx_unlock(thread);
902
903 thread_policy_update_complete_unlocked(thread, &pend_token);
904
905 return kr;
906 }
907
908 /*
909 * Set the thread's requested mode and recompute priority
910 * Called with thread mutex and thread locked
911 *
912 * TODO: Mitigate potential problems caused by moving thread to end of runq
913 * whenever its priority is recomputed
914 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
915 */
916 static void
thread_set_user_sched_mode_and_recompute_pri(thread_t thread,sched_mode_t mode)917 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
918 {
919 if (thread->policy_reset) {
920 return;
921 }
922
923 boolean_t removed = thread_run_queue_remove(thread);
924
925 /*
926 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
927 * That way there's zero confusion over which the user wants
928 * and which the kernel wants.
929 */
930 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
931 thread->saved_mode = mode;
932 } else {
933 sched_set_thread_mode(thread, mode);
934 }
935
936 thread_recompute_priority(thread);
937
938 if (removed) {
939 thread_run_queue_reinsert(thread, SCHED_TAILQ);
940 }
941 }
942
943 /* called at splsched with thread lock locked */
944 static void
thread_update_qos_cpu_time_locked(thread_t thread)945 thread_update_qos_cpu_time_locked(thread_t thread)
946 {
947 task_t task = get_threadtask(thread);
948 uint64_t timer_sum, timer_delta;
949
950 /*
951 * This is only as accurate as the distance between
952 * last context switch (embedded) or last user/kernel boundary transition (desktop)
953 * because user_timer and system_timer are only updated then.
954 *
955 * TODO: Consider running a timer_update operation here to update it first.
956 * Maybe doable with interrupts disabled from current thread.
957 * If the thread is on a different core, may not be easy to get right.
958 *
959 * TODO: There should be a function for this in timer.c
960 */
961
962 timer_sum = timer_grab(&thread->user_timer);
963 timer_sum += timer_grab(&thread->system_timer);
964 timer_delta = timer_sum - thread->vtimer_qos_save;
965
966 thread->vtimer_qos_save = timer_sum;
967
968 uint64_t* task_counter = NULL;
969
970 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
971 switch (thread->effective_policy.thep_qos) {
972 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
973 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
974 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
975 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
976 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
977 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
978 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
979 default:
980 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
981 }
982
983 OSAddAtomic64(timer_delta, task_counter);
984
985 /* Update the task-level qos stats atomically, because we don't have the task lock. */
986 switch (thread->requested_policy.thrp_qos) {
987 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
988 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
989 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
990 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
991 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
992 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
993 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
994 default:
995 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
996 }
997
998 OSAddAtomic64(timer_delta, task_counter);
999 }
1000
1001 /*
1002 * called with no thread locks held
1003 * may hold task lock
1004 */
1005 void
thread_update_qos_cpu_time(thread_t thread)1006 thread_update_qos_cpu_time(thread_t thread)
1007 {
1008 thread_mtx_lock(thread);
1009
1010 spl_t s = splsched();
1011 thread_lock(thread);
1012
1013 thread_update_qos_cpu_time_locked(thread);
1014
1015 thread_unlock(thread);
1016 splx(s);
1017
1018 thread_mtx_unlock(thread);
1019 }
1020
1021 /*
1022 * Calculate base priority from thread attributes, and set it on the thread
1023 *
1024 * Called with thread_lock and thread mutex held.
1025 */
1026 extern boolean_t vps_dynamic_priority_enabled;
1027
1028 void
thread_recompute_priority(thread_t thread)1029 thread_recompute_priority(
1030 thread_t thread)
1031 {
1032 integer_t priority;
1033
1034 if (thread->policy_reset) {
1035 return;
1036 }
1037
1038 if (thread->sched_mode == TH_MODE_REALTIME) {
1039 uint8_t i = thread->realtime.priority_offset;
1040 assert((i >= 0) && (i < NRTQS));
1041 priority = BASEPRI_RTQUEUES + i;
1042 sched_set_thread_base_priority(thread, priority);
1043 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1044 /* Make sure the thread has a valid deadline */
1045 uint64_t ctime = mach_absolute_time();
1046 thread->realtime.deadline = thread->realtime.constraint + ctime;
1047 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1048 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1049 }
1050 return;
1051 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1052 int qos = thread->effective_policy.thep_qos;
1053 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1054 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1055 int qos_scaled_relprio;
1056
1057 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1058 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1059
1060 priority = thread_qos_policy_params.qos_pri[qos];
1061 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1062
1063 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1064 /* Bump priority 46 to 47 when in a frontmost app */
1065 qos_scaled_relprio += 1;
1066 }
1067
1068 /* TODO: factor in renice priority here? */
1069
1070 priority += qos_scaled_relprio;
1071 } else {
1072 if (thread->importance > MAXPRI) {
1073 priority = MAXPRI;
1074 } else if (thread->importance < -MAXPRI) {
1075 priority = -MAXPRI;
1076 } else {
1077 priority = thread->importance;
1078 }
1079
1080 priority += thread->task_priority;
1081 }
1082
1083 priority = MAX(priority, thread->user_promotion_basepri);
1084
1085 /*
1086 * Clamp priority back into the allowed range for this task.
1087 * The initial priority value could be out of this range due to:
1088 * Task clamped to BG or Utility (max-pri is 4, or 20)
1089 * Task is user task (max-pri is 63)
1090 * Task is kernel task (max-pri is 95)
1091 * Note that thread->importance is user-settable to any integer
1092 * via THREAD_PRECEDENCE_POLICY.
1093 */
1094 if (priority > thread->max_priority) {
1095 if (thread->effective_policy.thep_promote_above_task) {
1096 priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1097 } else {
1098 priority = thread->max_priority;
1099 }
1100 } else if (priority < MINPRI) {
1101 priority = MINPRI;
1102 }
1103
1104 if (thread->saved_mode == TH_MODE_REALTIME &&
1105 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1106 priority = DEPRESSPRI;
1107 }
1108
1109 if (thread->effective_policy.thep_terminated == TRUE) {
1110 /*
1111 * We temporarily want to override the expected priority to
1112 * ensure that the thread exits in a timely manner.
1113 * Note that this is allowed to exceed thread->max_priority
1114 * so that the thread is no longer clamped to background
1115 * during the final exit phase.
1116 */
1117 if (priority < thread->task_priority) {
1118 priority = thread->task_priority;
1119 }
1120 if (priority < BASEPRI_DEFAULT) {
1121 priority = BASEPRI_DEFAULT;
1122 }
1123 }
1124
1125 #if !defined(XNU_TARGET_OS_OSX)
1126 /* No one can have a base priority less than MAXPRI_THROTTLE */
1127 if (priority < MAXPRI_THROTTLE) {
1128 priority = MAXPRI_THROTTLE;
1129 }
1130 #endif /* !defined(XNU_TARGET_OS_OSX) */
1131
1132 sched_set_thread_base_priority(thread, priority);
1133 }
1134
1135 /* Called with the task lock held, but not the thread mutex or spinlock */
1136 void
thread_policy_update_tasklocked(thread_t thread,integer_t priority,integer_t max_priority,task_pend_token_t pend_token)1137 thread_policy_update_tasklocked(
1138 thread_t thread,
1139 integer_t priority,
1140 integer_t max_priority,
1141 task_pend_token_t pend_token)
1142 {
1143 thread_mtx_lock(thread);
1144
1145 if (!thread->active || thread->policy_reset) {
1146 thread_mtx_unlock(thread);
1147 return;
1148 }
1149
1150 spl_t s = splsched();
1151 thread_lock(thread);
1152
1153 __unused
1154 integer_t old_max_priority = thread->max_priority;
1155
1156 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1157 thread->task_priority = (int16_t)priority;
1158
1159 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1160 thread->max_priority = (int16_t)max_priority;
1161
1162 /*
1163 * When backgrounding a thread, realtime and fixed priority threads
1164 * should be demoted to timeshare background threads.
1165 *
1166 * TODO: Do this inside the thread policy update routine in order to avoid double
1167 * remove/reinsert for a runnable thread
1168 */
1169 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1170 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1171 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1172 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1173 }
1174
1175 thread_policy_update_spinlocked(thread, true, pend_token);
1176
1177 thread_unlock(thread);
1178 splx(s);
1179
1180 thread_mtx_unlock(thread);
1181 }
1182
1183 /*
1184 * Reset thread to default state in preparation for termination
1185 * Called with thread mutex locked
1186 *
1187 * Always called on current thread, so we don't need a run queue remove
1188 */
1189 void
thread_policy_reset(thread_t thread)1190 thread_policy_reset(
1191 thread_t thread)
1192 {
1193 spl_t s;
1194
1195 assert(thread == current_thread());
1196
1197 s = splsched();
1198 thread_lock(thread);
1199
1200 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1201 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1202 }
1203
1204 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1205 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1206 }
1207
1208 /* At this point, the various demotions should be inactive */
1209 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1210 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1211 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1212
1213 /* Reset thread back to task-default basepri and mode */
1214 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1215
1216 sched_set_thread_mode(thread, newmode);
1217
1218 thread->importance = 0;
1219
1220 /* Prevent further changes to thread base priority or mode */
1221 thread->policy_reset = 1;
1222
1223 sched_set_thread_base_priority(thread, thread->task_priority);
1224
1225 thread_unlock(thread);
1226 splx(s);
1227 }
1228
1229 kern_return_t
thread_policy_get(thread_t thread,thread_policy_flavor_t flavor,thread_policy_t policy_info,mach_msg_type_number_t * count,boolean_t * get_default)1230 thread_policy_get(
1231 thread_t thread,
1232 thread_policy_flavor_t flavor,
1233 thread_policy_t policy_info,
1234 mach_msg_type_number_t *count,
1235 boolean_t *get_default)
1236 {
1237 kern_return_t result = KERN_SUCCESS;
1238
1239 if (thread == THREAD_NULL) {
1240 return KERN_INVALID_ARGUMENT;
1241 }
1242
1243 thread_mtx_lock(thread);
1244 if (!thread->active) {
1245 thread_mtx_unlock(thread);
1246
1247 return KERN_TERMINATED;
1248 }
1249
1250 switch (flavor) {
1251 case THREAD_EXTENDED_POLICY:
1252 {
1253 boolean_t timeshare = TRUE;
1254
1255 if (!(*get_default)) {
1256 spl_t s = splsched();
1257 thread_lock(thread);
1258
1259 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1260 (thread->saved_mode != TH_MODE_REALTIME)) {
1261 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1262 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1263 } else {
1264 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1265 }
1266 } else {
1267 *get_default = TRUE;
1268 }
1269
1270 thread_unlock(thread);
1271 splx(s);
1272 }
1273
1274 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1275 thread_extended_policy_t info;
1276
1277 info = (thread_extended_policy_t)policy_info;
1278 info->timeshare = timeshare;
1279 }
1280
1281 break;
1282 }
1283
1284 case THREAD_TIME_CONSTRAINT_POLICY:
1285 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1286 {
1287 thread_time_constraint_with_priority_policy_t info;
1288
1289 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1290 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1291 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1292
1293 if (*count < min_count) {
1294 result = KERN_INVALID_ARGUMENT;
1295 break;
1296 }
1297
1298 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1299
1300 if (!(*get_default)) {
1301 spl_t s = splsched();
1302 thread_lock(thread);
1303
1304 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1305 (thread->saved_mode == TH_MODE_REALTIME)) {
1306 info->period = thread->realtime.period;
1307 info->computation = thread->realtime.computation;
1308 info->constraint = thread->realtime.constraint;
1309 info->preemptible = thread->realtime.preemptible;
1310 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1311 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1312 }
1313 } else {
1314 *get_default = TRUE;
1315 }
1316
1317 thread_unlock(thread);
1318 splx(s);
1319 }
1320
1321 if (*get_default) {
1322 info->period = 0;
1323 info->computation = default_timeshare_computation;
1324 info->constraint = default_timeshare_constraint;
1325 info->preemptible = TRUE;
1326 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1327 info->priority = BASEPRI_RTQUEUES;
1328 }
1329 }
1330
1331
1332 break;
1333 }
1334
1335 case THREAD_PRECEDENCE_POLICY:
1336 {
1337 thread_precedence_policy_t info;
1338
1339 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1340 result = KERN_INVALID_ARGUMENT;
1341 break;
1342 }
1343
1344 info = (thread_precedence_policy_t)policy_info;
1345
1346 if (!(*get_default)) {
1347 spl_t s = splsched();
1348 thread_lock(thread);
1349
1350 info->importance = thread->importance;
1351
1352 thread_unlock(thread);
1353 splx(s);
1354 } else {
1355 info->importance = 0;
1356 }
1357
1358 break;
1359 }
1360
1361 case THREAD_AFFINITY_POLICY:
1362 {
1363 thread_affinity_policy_t info;
1364
1365 if (!thread_affinity_is_supported()) {
1366 result = KERN_NOT_SUPPORTED;
1367 break;
1368 }
1369 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1370 result = KERN_INVALID_ARGUMENT;
1371 break;
1372 }
1373
1374 info = (thread_affinity_policy_t)policy_info;
1375
1376 if (!(*get_default)) {
1377 info->affinity_tag = thread_affinity_get(thread);
1378 } else {
1379 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1380 }
1381
1382 break;
1383 }
1384
1385 case THREAD_POLICY_STATE:
1386 {
1387 thread_policy_state_t info;
1388
1389 if (*count < THREAD_POLICY_STATE_COUNT) {
1390 result = KERN_INVALID_ARGUMENT;
1391 break;
1392 }
1393
1394 /* Only root can get this info */
1395 if (!task_is_privileged(current_task())) {
1396 result = KERN_PROTECTION_FAILURE;
1397 break;
1398 }
1399
1400 info = (thread_policy_state_t)(void*)policy_info;
1401
1402 if (!(*get_default)) {
1403 info->flags = 0;
1404
1405 spl_t s = splsched();
1406 thread_lock(thread);
1407
1408 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1409
1410 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1411 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1412
1413 info->thps_user_promotions = 0;
1414 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1415 info->thps_ipc_overrides = thread->kevent_overrides;
1416
1417 proc_get_thread_policy_bitfield(thread, info);
1418
1419 thread_unlock(thread);
1420 splx(s);
1421 } else {
1422 info->requested = 0;
1423 info->effective = 0;
1424 info->pending = 0;
1425 }
1426
1427 break;
1428 }
1429
1430 case THREAD_LATENCY_QOS_POLICY:
1431 {
1432 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1433 thread_latency_qos_t plqos;
1434
1435 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1436 result = KERN_INVALID_ARGUMENT;
1437 break;
1438 }
1439
1440 if (*get_default) {
1441 plqos = 0;
1442 } else {
1443 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1444 }
1445
1446 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1447 }
1448 break;
1449
1450 case THREAD_THROUGHPUT_QOS_POLICY:
1451 {
1452 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1453 thread_throughput_qos_t ptqos;
1454
1455 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1456 result = KERN_INVALID_ARGUMENT;
1457 break;
1458 }
1459
1460 if (*get_default) {
1461 ptqos = 0;
1462 } else {
1463 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1464 }
1465
1466 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1467 }
1468 break;
1469
1470 case THREAD_QOS_POLICY:
1471 {
1472 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1473
1474 if (*count < THREAD_QOS_POLICY_COUNT) {
1475 result = KERN_INVALID_ARGUMENT;
1476 break;
1477 }
1478
1479 if (!(*get_default)) {
1480 int relprio_value = 0;
1481 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1482 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1483
1484 info->tier_importance = -relprio_value;
1485 } else {
1486 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1487 info->tier_importance = 0;
1488 }
1489
1490 break;
1491 }
1492
1493 default:
1494 result = KERN_INVALID_ARGUMENT;
1495 break;
1496 }
1497
1498 thread_mtx_unlock(thread);
1499
1500 return result;
1501 }
1502
1503 void
thread_policy_create(thread_t thread)1504 thread_policy_create(thread_t thread)
1505 {
1506 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1507 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1508 thread_tid(thread), theffective_0(thread),
1509 theffective_1(thread), thread->base_pri, 0);
1510
1511 /* We pass a pend token but ignore it */
1512 struct task_pend_token pend_token = {};
1513
1514 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1515
1516 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1517 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1518 thread_tid(thread), theffective_0(thread),
1519 theffective_1(thread), thread->base_pri, 0);
1520 }
1521
1522 static void
thread_policy_update_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1523 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1524 {
1525 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1526 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1527 thread_tid(thread), theffective_0(thread),
1528 theffective_1(thread), thread->base_pri, 0);
1529
1530 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1531
1532 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1533 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1534 thread_tid(thread), theffective_0(thread),
1535 theffective_1(thread), thread->base_pri, 0);
1536 }
1537
1538
1539
1540 /*
1541 * One thread state update function TO RULE THEM ALL
1542 *
1543 * This function updates the thread effective policy fields
1544 * and pushes the results to the relevant subsystems.
1545 *
1546 * Returns TRUE if a pended action needs to be run.
1547 *
1548 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1549 */
1550 static void
thread_policy_update_internal_spinlocked(thread_t thread,bool recompute_priority,task_pend_token_t pend_token)1551 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1552 task_pend_token_t pend_token)
1553 {
1554 /*
1555 * Step 1:
1556 * Gather requested policy and effective task state
1557 */
1558
1559 struct thread_requested_policy requested = thread->requested_policy;
1560 struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1561
1562 /*
1563 * Step 2:
1564 * Calculate new effective policies from requested policy, task and thread state
1565 * Rules:
1566 * Don't change requested, it won't take effect
1567 */
1568
1569 struct thread_effective_policy next = {};
1570
1571 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1572
1573 uint32_t next_qos = requested.thrp_qos;
1574
1575 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1576 next_qos = MAX(requested.thrp_qos_override, next_qos);
1577 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1578 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1579 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1580 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1581 }
1582
1583 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1584 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1585 /*
1586 * This thread is turnstile-boosted higher than the adaptive clamp
1587 * by a synchronous waiter. Allow that to override the adaptive
1588 * clamp temporarily for this thread only.
1589 */
1590 next.thep_promote_above_task = true;
1591 next_qos = requested.thrp_qos_promote;
1592 }
1593
1594 next.thep_qos = next_qos;
1595
1596 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1597 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1598 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1599 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1600 } else {
1601 next.thep_qos = task_effective.tep_qos_clamp;
1602 }
1603 }
1604
1605 /*
1606 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1607 * This allows QoS promotions to work properly even after the process is unclamped.
1608 */
1609 next.thep_qos_promote = next.thep_qos;
1610
1611 /* The ceiling only applies to threads that are in the QoS world */
1612 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1613 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1614 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1615 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1616 }
1617
1618 /*
1619 * The QoS relative priority is only applicable when the original programmer's
1620 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1621 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1622 * since otherwise it would be lower than unclamped threads. Similarly, in the
1623 * presence of boosting, the programmer doesn't know what other actors
1624 * are boosting the thread.
1625 */
1626 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1627 (requested.thrp_qos == next.thep_qos) &&
1628 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1629 next.thep_qos_relprio = requested.thrp_qos_relprio;
1630 } else {
1631 next.thep_qos_relprio = 0;
1632 }
1633
1634 /* Calculate DARWIN_BG */
1635 bool wants_darwinbg = false;
1636 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1637
1638 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1639 wants_darwinbg = true;
1640 }
1641
1642 /*
1643 * If DARWIN_BG has been requested at either level, it's engaged.
1644 * darwinbg threads always create bg sockets,
1645 * but only some types of darwinbg change the sockets
1646 * after they're created
1647 */
1648 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1649 wants_all_sockets_bg = wants_darwinbg = true;
1650 }
1651
1652 if (requested.thrp_pidbind_bg) {
1653 wants_all_sockets_bg = wants_darwinbg = true;
1654 }
1655
1656 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1657 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1658 wants_darwinbg = true;
1659 }
1660
1661 /* Calculate side effects of DARWIN_BG */
1662
1663 if (wants_darwinbg) {
1664 next.thep_darwinbg = 1;
1665 }
1666
1667 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1668 next.thep_new_sockets_bg = 1;
1669 }
1670
1671 /* Don't use task_effective.tep_all_sockets_bg here */
1672 if (wants_all_sockets_bg) {
1673 next.thep_all_sockets_bg = 1;
1674 }
1675
1676 /* darwinbg implies background QOS (or lower) */
1677 if (next.thep_darwinbg &&
1678 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1679 next.thep_qos = THREAD_QOS_BACKGROUND;
1680 next.thep_qos_relprio = 0;
1681 }
1682
1683 /* Calculate IO policy */
1684
1685 int iopol = THROTTLE_LEVEL_TIER0;
1686
1687 /* Factor in the task's IO policy */
1688 if (next.thep_darwinbg) {
1689 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1690 }
1691
1692 if (!next.thep_promote_above_task) {
1693 iopol = MAX(iopol, task_effective.tep_io_tier);
1694 }
1695
1696 /* Look up the associated IO tier value for the QoS class */
1697 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1698
1699 iopol = MAX(iopol, requested.thrp_int_iotier);
1700 iopol = MAX(iopol, requested.thrp_ext_iotier);
1701
1702 /* Apply the kevent iotier override */
1703 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1704
1705 next.thep_io_tier = iopol;
1706
1707 /*
1708 * If a QoS override is causing IO to go into a lower tier, we also set
1709 * the passive bit so that a thread doesn't end up stuck in its own throttle
1710 * window when the override goes away.
1711 */
1712
1713 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1714 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1715 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1716
1717 /* Calculate Passive IO policy */
1718 if (requested.thrp_ext_iopassive ||
1719 requested.thrp_int_iopassive ||
1720 qos_io_override_active ||
1721 task_effective.tep_io_passive) {
1722 next.thep_io_passive = 1;
1723 }
1724
1725 /* Calculate timer QOS */
1726 uint32_t latency_qos = requested.thrp_latency_qos;
1727
1728 if (!next.thep_promote_above_task) {
1729 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1730 }
1731
1732 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1733
1734 next.thep_latency_qos = latency_qos;
1735
1736 /* Calculate throughput QOS */
1737 uint32_t through_qos = requested.thrp_through_qos;
1738
1739 if (!next.thep_promote_above_task) {
1740 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1741 }
1742
1743 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1744
1745 next.thep_through_qos = through_qos;
1746
1747 if (task_effective.tep_terminated || requested.thrp_terminated) {
1748 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1749 next.thep_terminated = 1;
1750 next.thep_darwinbg = 0;
1751 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1752 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1753 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1754 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1755 }
1756
1757 /*
1758 * Step 3:
1759 * Swap out old policy for new policy
1760 */
1761
1762 struct thread_effective_policy prev = thread->effective_policy;
1763
1764 thread_update_qos_cpu_time_locked(thread);
1765
1766 /* This is the point where the new values become visible to other threads */
1767 thread->effective_policy = next;
1768
1769 /*
1770 * Step 4:
1771 * Pend updates that can't be done while holding the thread lock
1772 */
1773
1774 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1775 pend_token->tpt_update_sockets = 1;
1776 }
1777
1778 /* TODO: Doesn't this only need to be done if the throttle went up? */
1779 if (prev.thep_io_tier != next.thep_io_tier) {
1780 pend_token->tpt_update_throttle = 1;
1781 }
1782
1783 /*
1784 * Check for the attributes that sfi_thread_classify() consults,
1785 * and trigger SFI re-evaluation.
1786 */
1787 if (prev.thep_qos != next.thep_qos ||
1788 prev.thep_darwinbg != next.thep_darwinbg) {
1789 pend_token->tpt_update_thread_sfi = 1;
1790 }
1791
1792 integer_t old_base_pri = thread->base_pri;
1793
1794 /*
1795 * Step 5:
1796 * Update other subsystems as necessary if something has changed
1797 */
1798
1799 /* Check for the attributes that thread_recompute_priority() consults */
1800 if (prev.thep_qos != next.thep_qos ||
1801 prev.thep_qos_relprio != next.thep_qos_relprio ||
1802 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1803 prev.thep_promote_above_task != next.thep_promote_above_task ||
1804 prev.thep_terminated != next.thep_terminated ||
1805 pend_token->tpt_force_recompute_pri == 1 ||
1806 recompute_priority) {
1807 thread_recompute_priority(thread);
1808 }
1809
1810 /*
1811 * Check if the thread is waiting on a turnstile and needs priority propagation.
1812 */
1813 if (pend_token->tpt_update_turnstile &&
1814 ((old_base_pri == thread->base_pri) ||
1815 !thread_get_waiting_turnstile(thread))) {
1816 /*
1817 * Reset update turnstile pend token since either
1818 * the thread priority did not change or thread is
1819 * not blocked on a turnstile.
1820 */
1821 pend_token->tpt_update_turnstile = 0;
1822 }
1823 }
1824
1825
1826 /*
1827 * Initiate a thread policy state transition on a thread with its TID
1828 * Useful if you cannot guarantee the thread won't get terminated
1829 * Precondition: No locks are held
1830 * Will take task lock - using the non-tid variant is faster
1831 * if you already have a thread ref.
1832 */
1833 void
proc_set_thread_policy_with_tid(task_t task,uint64_t tid,int category,int flavor,int value)1834 proc_set_thread_policy_with_tid(task_t task,
1835 uint64_t tid,
1836 int category,
1837 int flavor,
1838 int value)
1839 {
1840 /* takes task lock, returns ref'ed thread or NULL */
1841 thread_t thread = task_findtid(task, tid);
1842
1843 if (thread == THREAD_NULL) {
1844 return;
1845 }
1846
1847 proc_set_thread_policy(thread, category, flavor, value);
1848
1849 thread_deallocate(thread);
1850 }
1851
1852 /*
1853 * Initiate a thread policy transition on a thread
1854 * This path supports networking transitions (i.e. darwinbg transitions)
1855 * Precondition: No locks are held
1856 */
1857 void
proc_set_thread_policy(thread_t thread,int category,int flavor,int value)1858 proc_set_thread_policy(thread_t thread,
1859 int category,
1860 int flavor,
1861 int value)
1862 {
1863 struct task_pend_token pend_token = {};
1864
1865 thread_mtx_lock(thread);
1866
1867 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1868
1869 thread_mtx_unlock(thread);
1870
1871 thread_policy_update_complete_unlocked(thread, &pend_token);
1872 }
1873
1874 /*
1875 * Do the things that can't be done while holding a thread mutex.
1876 * These are set up to call back into thread policy to get the latest value,
1877 * so they don't have to be synchronized with the update.
1878 * The only required semantic is 'call this sometime after updating effective policy'
1879 *
1880 * Precondition: Thread mutex is not held
1881 *
1882 * This may be called with the task lock held, but in that case it won't be
1883 * called with tpt_update_sockets set.
1884 */
1885 void
thread_policy_update_complete_unlocked(thread_t thread,task_pend_token_t pend_token)1886 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1887 {
1888 #ifdef MACH_BSD
1889 if (pend_token->tpt_update_sockets) {
1890 proc_apply_task_networkbg(task_pid(get_threadtask(thread)), thread);
1891 }
1892 #endif /* MACH_BSD */
1893
1894 if (pend_token->tpt_update_throttle) {
1895 rethrottle_thread(get_bsdthread_info(thread));
1896 }
1897
1898 if (pend_token->tpt_update_thread_sfi) {
1899 sfi_reevaluate(thread);
1900 }
1901
1902 if (pend_token->tpt_update_turnstile) {
1903 turnstile_update_thread_priority_chain(thread);
1904 }
1905 }
1906
1907 /*
1908 * Set and update thread policy
1909 * Thread mutex might be held
1910 */
1911 static void
proc_set_thread_policy_locked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1912 proc_set_thread_policy_locked(thread_t thread,
1913 int category,
1914 int flavor,
1915 int value,
1916 int value2,
1917 task_pend_token_t pend_token)
1918 {
1919 spl_t s = splsched();
1920 thread_lock(thread);
1921
1922 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1923
1924 thread_unlock(thread);
1925 splx(s);
1926 }
1927
1928 /*
1929 * Set and update thread policy
1930 * Thread spinlock is held
1931 */
1932 static void
proc_set_thread_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1933 proc_set_thread_policy_spinlocked(thread_t thread,
1934 int category,
1935 int flavor,
1936 int value,
1937 int value2,
1938 task_pend_token_t pend_token)
1939 {
1940 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1941 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1942 thread_tid(thread), threquested_0(thread),
1943 threquested_1(thread), value, 0);
1944
1945 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1946
1947 thread_policy_update_spinlocked(thread, false, pend_token);
1948
1949 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1950 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1951 thread_tid(thread), threquested_0(thread),
1952 threquested_1(thread), tpending(pend_token), 0);
1953 }
1954
1955 /*
1956 * Set the requested state for a specific flavor to a specific value.
1957 */
1958 static void
thread_set_requested_policy_spinlocked(thread_t thread,int category,int flavor,int value,int value2,task_pend_token_t pend_token)1959 thread_set_requested_policy_spinlocked(thread_t thread,
1960 int category,
1961 int flavor,
1962 int value,
1963 int value2,
1964 task_pend_token_t pend_token)
1965 {
1966 int tier, passive;
1967
1968 struct thread_requested_policy requested = thread->requested_policy;
1969
1970 switch (flavor) {
1971 /* Category: EXTERNAL and INTERNAL, thread and task */
1972
1973 case TASK_POLICY_DARWIN_BG:
1974 if (category == TASK_POLICY_EXTERNAL) {
1975 requested.thrp_ext_darwinbg = value;
1976 } else {
1977 requested.thrp_int_darwinbg = value;
1978 }
1979 break;
1980
1981 case TASK_POLICY_IOPOL:
1982 proc_iopol_to_tier(value, &tier, &passive);
1983 if (category == TASK_POLICY_EXTERNAL) {
1984 requested.thrp_ext_iotier = tier;
1985 requested.thrp_ext_iopassive = passive;
1986 } else {
1987 requested.thrp_int_iotier = tier;
1988 requested.thrp_int_iopassive = passive;
1989 }
1990 break;
1991
1992 case TASK_POLICY_IO:
1993 if (category == TASK_POLICY_EXTERNAL) {
1994 requested.thrp_ext_iotier = value;
1995 } else {
1996 requested.thrp_int_iotier = value;
1997 }
1998 break;
1999
2000 case TASK_POLICY_PASSIVE_IO:
2001 if (category == TASK_POLICY_EXTERNAL) {
2002 requested.thrp_ext_iopassive = value;
2003 } else {
2004 requested.thrp_int_iopassive = value;
2005 }
2006 break;
2007
2008 /* Category: ATTRIBUTE, thread only */
2009
2010 case TASK_POLICY_PIDBIND_BG:
2011 assert(category == TASK_POLICY_ATTRIBUTE);
2012 requested.thrp_pidbind_bg = value;
2013 break;
2014
2015 case TASK_POLICY_LATENCY_QOS:
2016 assert(category == TASK_POLICY_ATTRIBUTE);
2017 requested.thrp_latency_qos = value;
2018 break;
2019
2020 case TASK_POLICY_THROUGH_QOS:
2021 assert(category == TASK_POLICY_ATTRIBUTE);
2022 requested.thrp_through_qos = value;
2023 break;
2024
2025 case TASK_POLICY_QOS_OVERRIDE:
2026 assert(category == TASK_POLICY_ATTRIBUTE);
2027 requested.thrp_qos_override = value;
2028 pend_token->tpt_update_turnstile = 1;
2029 break;
2030
2031 case TASK_POLICY_QOS_AND_RELPRIO:
2032 assert(category == TASK_POLICY_ATTRIBUTE);
2033 requested.thrp_qos = value;
2034 requested.thrp_qos_relprio = value2;
2035 pend_token->tpt_update_turnstile = 1;
2036 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2037 break;
2038
2039 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2040 assert(category == TASK_POLICY_ATTRIBUTE);
2041 requested.thrp_qos_workq_override = value;
2042 pend_token->tpt_update_turnstile = 1;
2043 break;
2044
2045 case TASK_POLICY_QOS_PROMOTE:
2046 assert(category == TASK_POLICY_ATTRIBUTE);
2047 requested.thrp_qos_promote = value;
2048 break;
2049
2050 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2051 assert(category == TASK_POLICY_ATTRIBUTE);
2052 requested.thrp_qos_kevent_override = value;
2053 pend_token->tpt_update_turnstile = 1;
2054 break;
2055
2056 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2057 assert(category == TASK_POLICY_ATTRIBUTE);
2058 requested.thrp_qos_wlsvc_override = value;
2059 pend_token->tpt_update_turnstile = 1;
2060 break;
2061
2062 case TASK_POLICY_TERMINATED:
2063 assert(category == TASK_POLICY_ATTRIBUTE);
2064 requested.thrp_terminated = value;
2065 break;
2066
2067 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2068 assert(category == TASK_POLICY_ATTRIBUTE);
2069 requested.thrp_iotier_kevent_override = value;
2070 break;
2071
2072 default:
2073 panic("unknown task policy: %d %d %d", category, flavor, value);
2074 break;
2075 }
2076
2077 thread->requested_policy = requested;
2078 }
2079
2080 /*
2081 * Gets what you set. Effective values may be different.
2082 * Precondition: No locks are held
2083 */
2084 int
proc_get_thread_policy(thread_t thread,int category,int flavor)2085 proc_get_thread_policy(thread_t thread,
2086 int category,
2087 int flavor)
2088 {
2089 int value = 0;
2090 thread_mtx_lock(thread);
2091 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2092 thread_mtx_unlock(thread);
2093 return value;
2094 }
2095
2096 static int
proc_get_thread_policy_locked(thread_t thread,int category,int flavor,int * value2)2097 proc_get_thread_policy_locked(thread_t thread,
2098 int category,
2099 int flavor,
2100 int* value2)
2101 {
2102 int value = 0;
2103
2104 spl_t s = splsched();
2105 thread_lock(thread);
2106
2107 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2108
2109 thread_unlock(thread);
2110 splx(s);
2111
2112 return value;
2113 }
2114
2115 /*
2116 * Gets what you set. Effective values may be different.
2117 */
2118 static int
thread_get_requested_policy_spinlocked(thread_t thread,int category,int flavor,int * value2)2119 thread_get_requested_policy_spinlocked(thread_t thread,
2120 int category,
2121 int flavor,
2122 int* value2)
2123 {
2124 int value = 0;
2125
2126 struct thread_requested_policy requested = thread->requested_policy;
2127
2128 switch (flavor) {
2129 case TASK_POLICY_DARWIN_BG:
2130 if (category == TASK_POLICY_EXTERNAL) {
2131 value = requested.thrp_ext_darwinbg;
2132 } else {
2133 value = requested.thrp_int_darwinbg;
2134 }
2135 break;
2136 case TASK_POLICY_IOPOL:
2137 if (category == TASK_POLICY_EXTERNAL) {
2138 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2139 requested.thrp_ext_iopassive);
2140 } else {
2141 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2142 requested.thrp_int_iopassive);
2143 }
2144 break;
2145 case TASK_POLICY_IO:
2146 if (category == TASK_POLICY_EXTERNAL) {
2147 value = requested.thrp_ext_iotier;
2148 } else {
2149 value = requested.thrp_int_iotier;
2150 }
2151 break;
2152 case TASK_POLICY_PASSIVE_IO:
2153 if (category == TASK_POLICY_EXTERNAL) {
2154 value = requested.thrp_ext_iopassive;
2155 } else {
2156 value = requested.thrp_int_iopassive;
2157 }
2158 break;
2159 case TASK_POLICY_QOS:
2160 assert(category == TASK_POLICY_ATTRIBUTE);
2161 value = requested.thrp_qos;
2162 break;
2163 case TASK_POLICY_QOS_OVERRIDE:
2164 assert(category == TASK_POLICY_ATTRIBUTE);
2165 value = requested.thrp_qos_override;
2166 break;
2167 case TASK_POLICY_LATENCY_QOS:
2168 assert(category == TASK_POLICY_ATTRIBUTE);
2169 value = requested.thrp_latency_qos;
2170 break;
2171 case TASK_POLICY_THROUGH_QOS:
2172 assert(category == TASK_POLICY_ATTRIBUTE);
2173 value = requested.thrp_through_qos;
2174 break;
2175 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2176 assert(category == TASK_POLICY_ATTRIBUTE);
2177 value = requested.thrp_qos_workq_override;
2178 break;
2179 case TASK_POLICY_QOS_AND_RELPRIO:
2180 assert(category == TASK_POLICY_ATTRIBUTE);
2181 assert(value2 != NULL);
2182 value = requested.thrp_qos;
2183 *value2 = requested.thrp_qos_relprio;
2184 break;
2185 case TASK_POLICY_QOS_PROMOTE:
2186 assert(category == TASK_POLICY_ATTRIBUTE);
2187 value = requested.thrp_qos_promote;
2188 break;
2189 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2190 assert(category == TASK_POLICY_ATTRIBUTE);
2191 value = requested.thrp_qos_kevent_override;
2192 break;
2193 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2194 assert(category == TASK_POLICY_ATTRIBUTE);
2195 value = requested.thrp_qos_wlsvc_override;
2196 break;
2197 case TASK_POLICY_TERMINATED:
2198 assert(category == TASK_POLICY_ATTRIBUTE);
2199 value = requested.thrp_terminated;
2200 break;
2201 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2202 assert(category == TASK_POLICY_ATTRIBUTE);
2203 value = requested.thrp_iotier_kevent_override;
2204 break;
2205
2206 default:
2207 panic("unknown policy_flavor %d", flavor);
2208 break;
2209 }
2210
2211 return value;
2212 }
2213
2214 /*
2215 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2216 *
2217 * NOTE: This accessor does not take the task or thread lock.
2218 * Notifications of state updates need to be externally synchronized with state queries.
2219 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2220 * within the context of a timer interrupt.
2221 *
2222 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2223 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2224 * I don't think that cost is worth not having the right answer.
2225 */
2226 int
proc_get_effective_thread_policy(thread_t thread,int flavor)2227 proc_get_effective_thread_policy(thread_t thread,
2228 int flavor)
2229 {
2230 int value = 0;
2231
2232 switch (flavor) {
2233 case TASK_POLICY_DARWIN_BG:
2234 /*
2235 * This call is used within the timer layer, as well as
2236 * prioritizing requests to the graphics system.
2237 * It also informs SFI and originator-bg-state.
2238 * Returns 1 for background mode, 0 for normal mode
2239 */
2240
2241 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2242 break;
2243 case TASK_POLICY_IO:
2244 /*
2245 * The I/O system calls here to find out what throttling tier to apply to an operation.
2246 * Returns THROTTLE_LEVEL_* values
2247 */
2248 value = thread->effective_policy.thep_io_tier;
2249 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2250 value = MIN(value, thread->iotier_override);
2251 }
2252 break;
2253 case TASK_POLICY_PASSIVE_IO:
2254 /*
2255 * The I/O system calls here to find out whether an operation should be passive.
2256 * (i.e. not cause operations with lower throttle tiers to be throttled)
2257 * Returns 1 for passive mode, 0 for normal mode
2258 *
2259 * If an override is causing IO to go into a lower tier, we also set
2260 * the passive bit so that a thread doesn't end up stuck in its own throttle
2261 * window when the override goes away.
2262 */
2263 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2264 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2265 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2266 value = 1;
2267 }
2268 break;
2269 case TASK_POLICY_ALL_SOCKETS_BG:
2270 /*
2271 * do_background_socket() calls this to determine whether
2272 * it should change the thread's sockets
2273 * Returns 1 for background mode, 0 for normal mode
2274 * This consults both thread and task so un-DBGing a thread while the task is BG
2275 * doesn't get you out of the network throttle.
2276 */
2277 value = (thread->effective_policy.thep_all_sockets_bg ||
2278 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2279 break;
2280 case TASK_POLICY_NEW_SOCKETS_BG:
2281 /*
2282 * socreate() calls this to determine if it should mark a new socket as background
2283 * Returns 1 for background mode, 0 for normal mode
2284 */
2285 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2286 break;
2287 case TASK_POLICY_LATENCY_QOS:
2288 /*
2289 * timer arming calls into here to find out the timer coalescing level
2290 * Returns a latency QoS tier (0-6)
2291 */
2292 value = thread->effective_policy.thep_latency_qos;
2293 break;
2294 case TASK_POLICY_THROUGH_QOS:
2295 /*
2296 * This value is passed into the urgency callout from the scheduler
2297 * to the performance management subsystem.
2298 *
2299 * Returns a throughput QoS tier (0-6)
2300 */
2301 value = thread->effective_policy.thep_through_qos;
2302 break;
2303 case TASK_POLICY_QOS:
2304 /*
2305 * This is communicated to the performance management layer and SFI.
2306 *
2307 * Returns a QoS policy tier
2308 */
2309 value = thread->effective_policy.thep_qos;
2310 break;
2311 default:
2312 panic("unknown thread policy flavor %d", flavor);
2313 break;
2314 }
2315
2316 return value;
2317 }
2318
2319
2320 /*
2321 * (integer_t) casts limit the number of bits we can fit here
2322 * this interface is deprecated and replaced by the _EXT struct ?
2323 */
2324 static void
proc_get_thread_policy_bitfield(thread_t thread,thread_policy_state_t info)2325 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2326 {
2327 uint64_t bits = 0;
2328 struct thread_requested_policy requested = thread->requested_policy;
2329
2330 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2331 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2332 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2333 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2334 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2335 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2336
2337 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2338 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2339
2340 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2341
2342 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2343 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2344
2345 info->requested = (integer_t) bits;
2346 bits = 0;
2347
2348 struct thread_effective_policy effective = thread->effective_policy;
2349
2350 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2351
2352 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2353 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2354 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2355 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2356
2357 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2358
2359 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2360 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2361
2362 info->effective = (integer_t)bits;
2363 bits = 0;
2364
2365 info->pending = 0;
2366 }
2367
2368 /*
2369 * Sneakily trace either the task and thread requested
2370 * or just the thread requested, depending on if we have enough room.
2371 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2372 *
2373 * LP32 LP64
2374 * threquested_0(thread) thread[0] task[0]
2375 * threquested_1(thread) thread[1] thread[0]
2376 *
2377 */
2378
2379 uintptr_t
threquested_0(thread_t thread)2380 threquested_0(thread_t thread)
2381 {
2382 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2383
2384 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2385
2386 return raw[0];
2387 }
2388
2389 uintptr_t
threquested_1(thread_t thread)2390 threquested_1(thread_t thread)
2391 {
2392 #if defined __LP64__
2393 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2394 #else
2395 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2396 return raw[1];
2397 #endif
2398 }
2399
2400 uintptr_t
theffective_0(thread_t thread)2401 theffective_0(thread_t thread)
2402 {
2403 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2404
2405 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2406 return raw[0];
2407 }
2408
2409 uintptr_t
theffective_1(thread_t thread)2410 theffective_1(thread_t thread)
2411 {
2412 #if defined __LP64__
2413 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2414 #else
2415 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2416 return raw[1];
2417 #endif
2418 }
2419
2420
2421 /*
2422 * Set an override on the thread which is consulted with a
2423 * higher priority than the task/thread policy. This should
2424 * only be set for temporary grants until the thread
2425 * returns to the userspace boundary
2426 *
2427 * We use atomic operations to swap in the override, with
2428 * the assumption that the thread itself can
2429 * read the override and clear it on return to userspace.
2430 *
2431 * No locking is performed, since it is acceptable to see
2432 * a stale override for one loop through throttle_lowpri_io().
2433 * However a thread reference must be held on the thread.
2434 */
2435
2436 void
set_thread_iotier_override(thread_t thread,int policy)2437 set_thread_iotier_override(thread_t thread, int policy)
2438 {
2439 int current_override;
2440
2441 /* Let most aggressive I/O policy win until user boundary */
2442 do {
2443 current_override = thread->iotier_override;
2444
2445 if (current_override != THROTTLE_LEVEL_NONE) {
2446 policy = MIN(current_override, policy);
2447 }
2448
2449 if (current_override == policy) {
2450 /* no effective change */
2451 return;
2452 }
2453 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2454
2455 /*
2456 * Since the thread may be currently throttled,
2457 * re-evaluate tiers and potentially break out
2458 * of an msleep
2459 */
2460 rethrottle_thread(get_bsdthread_info(thread));
2461 }
2462
2463 /*
2464 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2465 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2466 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2467 * priority thread. In these cases, we attempt to propagate the priority token, as long
2468 * as the subsystem informs us of the relationships between the threads. The userspace
2469 * synchronization subsystem should maintain the information of owner->resource and
2470 * resource->waiters itself.
2471 */
2472
2473 /*
2474 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2475 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2476 * to be handled specially in the future, but for now it's fine to slam
2477 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2478 */
2479 static void
canonicalize_resource_and_type(user_addr_t * resource,int * resource_type)2480 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2481 {
2482 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2483 /* Map all input resource/type to a single one */
2484 *resource = USER_ADDR_NULL;
2485 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2486 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2487 /* no transform */
2488 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2489 /* Map all mutex overrides to a single one, to avoid memory overhead */
2490 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2491 *resource = USER_ADDR_NULL;
2492 }
2493 }
2494 }
2495
2496 /* This helper routine finds an existing override if known. Locking should be done by caller */
2497 static struct thread_qos_override *
find_qos_override(thread_t thread,user_addr_t resource,int resource_type)2498 find_qos_override(thread_t thread,
2499 user_addr_t resource,
2500 int resource_type)
2501 {
2502 struct thread_qos_override *override;
2503
2504 override = thread->overrides;
2505 while (override) {
2506 if (override->override_resource == resource &&
2507 override->override_resource_type == resource_type) {
2508 return override;
2509 }
2510
2511 override = override->override_next;
2512 }
2513
2514 return NULL;
2515 }
2516
2517 static void
find_and_decrement_qos_override(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset,struct thread_qos_override ** free_override_list)2518 find_and_decrement_qos_override(thread_t thread,
2519 user_addr_t resource,
2520 int resource_type,
2521 boolean_t reset,
2522 struct thread_qos_override **free_override_list)
2523 {
2524 struct thread_qos_override *override, *override_prev;
2525
2526 override_prev = NULL;
2527 override = thread->overrides;
2528 while (override) {
2529 struct thread_qos_override *override_next = override->override_next;
2530
2531 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2532 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2533 if (reset) {
2534 override->override_contended_resource_count = 0;
2535 } else {
2536 override->override_contended_resource_count--;
2537 }
2538
2539 if (override->override_contended_resource_count == 0) {
2540 if (override_prev == NULL) {
2541 thread->overrides = override_next;
2542 } else {
2543 override_prev->override_next = override_next;
2544 }
2545
2546 /* Add to out-param for later zfree */
2547 override->override_next = *free_override_list;
2548 *free_override_list = override;
2549 } else {
2550 override_prev = override;
2551 }
2552
2553 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2554 return;
2555 }
2556 } else {
2557 override_prev = override;
2558 }
2559
2560 override = override_next;
2561 }
2562 }
2563
2564 /* This helper recalculates the current requested override using the policy selected at boot */
2565 static int
calculate_requested_qos_override(thread_t thread)2566 calculate_requested_qos_override(thread_t thread)
2567 {
2568 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2569 return THREAD_QOS_UNSPECIFIED;
2570 }
2571
2572 /* iterate over all overrides and calculate MAX */
2573 struct thread_qos_override *override;
2574 int qos_override = THREAD_QOS_UNSPECIFIED;
2575
2576 override = thread->overrides;
2577 while (override) {
2578 qos_override = MAX(qos_override, override->override_qos);
2579 override = override->override_next;
2580 }
2581
2582 return qos_override;
2583 }
2584
2585 /*
2586 * Returns:
2587 * - 0 on success
2588 * - EINVAL if some invalid input was passed
2589 */
2590 static int
proc_thread_qos_add_override_internal(thread_t thread,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2591 proc_thread_qos_add_override_internal(thread_t thread,
2592 int override_qos,
2593 boolean_t first_override_for_resource,
2594 user_addr_t resource,
2595 int resource_type)
2596 {
2597 struct task_pend_token pend_token = {};
2598 int rc = 0;
2599
2600 thread_mtx_lock(thread);
2601
2602 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2603 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2604
2605 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2606 uint64_t, thread->requested_policy.thrp_qos,
2607 uint64_t, thread->effective_policy.thep_qos,
2608 int, override_qos, boolean_t, first_override_for_resource);
2609
2610 struct thread_qos_override *override;
2611 struct thread_qos_override *override_new = NULL;
2612 int new_qos_override, prev_qos_override;
2613 int new_effective_qos;
2614
2615 canonicalize_resource_and_type(&resource, &resource_type);
2616
2617 override = find_qos_override(thread, resource, resource_type);
2618 if (first_override_for_resource && !override) {
2619 /* We need to allocate a new object. Drop the thread lock and
2620 * recheck afterwards in case someone else added the override
2621 */
2622 thread_mtx_unlock(thread);
2623 override_new = zalloc(thread_qos_override_zone);
2624 thread_mtx_lock(thread);
2625 override = find_qos_override(thread, resource, resource_type);
2626 }
2627 if (first_override_for_resource && override) {
2628 /* Someone else already allocated while the thread lock was dropped */
2629 override->override_contended_resource_count++;
2630 } else if (!override && override_new) {
2631 override = override_new;
2632 override_new = NULL;
2633 override->override_next = thread->overrides;
2634 /* since first_override_for_resource was TRUE */
2635 override->override_contended_resource_count = 1;
2636 override->override_resource = resource;
2637 override->override_resource_type = (int16_t)resource_type;
2638 override->override_qos = THREAD_QOS_UNSPECIFIED;
2639 thread->overrides = override;
2640 }
2641
2642 if (override) {
2643 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2644 override->override_qos = (int16_t)override_qos;
2645 } else {
2646 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2647 }
2648 }
2649
2650 /* Determine how to combine the various overrides into a single current
2651 * requested override
2652 */
2653 new_qos_override = calculate_requested_qos_override(thread);
2654
2655 prev_qos_override = proc_get_thread_policy_locked(thread,
2656 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2657
2658 if (new_qos_override != prev_qos_override) {
2659 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2660 TASK_POLICY_QOS_OVERRIDE,
2661 new_qos_override, 0, &pend_token);
2662 }
2663
2664 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2665
2666 thread_mtx_unlock(thread);
2667
2668 thread_policy_update_complete_unlocked(thread, &pend_token);
2669
2670 if (override_new) {
2671 zfree(thread_qos_override_zone, override_new);
2672 }
2673
2674 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2675 int, new_qos_override, int, new_effective_qos, int, rc);
2676
2677 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2678 new_qos_override, resource, resource_type, 0, 0);
2679
2680 return rc;
2681 }
2682
2683 int
proc_thread_qos_add_override(task_t task,thread_t thread,uint64_t tid,int override_qos,boolean_t first_override_for_resource,user_addr_t resource,int resource_type)2684 proc_thread_qos_add_override(task_t task,
2685 thread_t thread,
2686 uint64_t tid,
2687 int override_qos,
2688 boolean_t first_override_for_resource,
2689 user_addr_t resource,
2690 int resource_type)
2691 {
2692 boolean_t has_thread_reference = FALSE;
2693 int rc = 0;
2694
2695 if (thread == THREAD_NULL) {
2696 thread = task_findtid(task, tid);
2697 /* returns referenced thread */
2698
2699 if (thread == THREAD_NULL) {
2700 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2701 tid, 0, 0xdead, 0, 0);
2702 return ESRCH;
2703 }
2704 has_thread_reference = TRUE;
2705 } else {
2706 assert(get_threadtask(thread) == task);
2707 }
2708 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2709 first_override_for_resource, resource, resource_type);
2710 if (has_thread_reference) {
2711 thread_deallocate(thread);
2712 }
2713
2714 return rc;
2715 }
2716
2717 static void
proc_thread_qos_remove_override_internal(thread_t thread,user_addr_t resource,int resource_type,boolean_t reset)2718 proc_thread_qos_remove_override_internal(thread_t thread,
2719 user_addr_t resource,
2720 int resource_type,
2721 boolean_t reset)
2722 {
2723 struct task_pend_token pend_token = {};
2724
2725 struct thread_qos_override *deferred_free_override_list = NULL;
2726 int new_qos_override, prev_qos_override, new_effective_qos;
2727
2728 thread_mtx_lock(thread);
2729
2730 canonicalize_resource_and_type(&resource, &resource_type);
2731
2732 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2733
2734 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2735 thread_tid(thread), resource, reset, 0, 0);
2736
2737 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2738 uint64_t, thread->requested_policy.thrp_qos,
2739 uint64_t, thread->effective_policy.thep_qos);
2740
2741 /* Determine how to combine the various overrides into a single current requested override */
2742 new_qos_override = calculate_requested_qos_override(thread);
2743
2744 spl_t s = splsched();
2745 thread_lock(thread);
2746
2747 /*
2748 * The override chain and therefore the value of the current override is locked with thread mutex,
2749 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2750 * This means you can't change the current override from a spinlock-only setter.
2751 */
2752 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2753
2754 if (new_qos_override != prev_qos_override) {
2755 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2756 }
2757
2758 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2759
2760 thread_unlock(thread);
2761 splx(s);
2762
2763 thread_mtx_unlock(thread);
2764
2765 thread_policy_update_complete_unlocked(thread, &pend_token);
2766
2767 while (deferred_free_override_list) {
2768 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2769
2770 zfree(thread_qos_override_zone, deferred_free_override_list);
2771 deferred_free_override_list = override_next;
2772 }
2773
2774 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2775 int, new_qos_override, int, new_effective_qos);
2776
2777 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2778 thread_tid(thread), 0, 0, 0, 0);
2779 }
2780
2781 int
proc_thread_qos_remove_override(task_t task,thread_t thread,uint64_t tid,user_addr_t resource,int resource_type)2782 proc_thread_qos_remove_override(task_t task,
2783 thread_t thread,
2784 uint64_t tid,
2785 user_addr_t resource,
2786 int resource_type)
2787 {
2788 boolean_t has_thread_reference = FALSE;
2789
2790 if (thread == THREAD_NULL) {
2791 thread = task_findtid(task, tid);
2792 /* returns referenced thread */
2793
2794 if (thread == THREAD_NULL) {
2795 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2796 tid, 0, 0xdead, 0, 0);
2797 return ESRCH;
2798 }
2799 has_thread_reference = TRUE;
2800 } else {
2801 assert(task == get_threadtask(thread));
2802 }
2803
2804 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2805
2806 if (has_thread_reference) {
2807 thread_deallocate(thread);
2808 }
2809
2810 return 0;
2811 }
2812
2813 /* Deallocate before thread termination */
2814 void
proc_thread_qos_deallocate(thread_t thread)2815 proc_thread_qos_deallocate(thread_t thread)
2816 {
2817 /* This thread must have no more IPC overrides. */
2818 assert(thread->kevent_overrides == 0);
2819 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2820 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2821
2822 /*
2823 * Clear out any lingering override objects.
2824 */
2825 struct thread_qos_override *override;
2826
2827 thread_mtx_lock(thread);
2828 override = thread->overrides;
2829 thread->overrides = NULL;
2830 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2831 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2832 thread_mtx_unlock(thread);
2833
2834 while (override) {
2835 struct thread_qos_override *override_next = override->override_next;
2836
2837 zfree(thread_qos_override_zone, override);
2838 override = override_next;
2839 }
2840 }
2841
2842 /*
2843 * Set up the primordial thread's QoS
2844 */
2845 void
task_set_main_thread_qos(task_t task,thread_t thread)2846 task_set_main_thread_qos(task_t task, thread_t thread)
2847 {
2848 struct task_pend_token pend_token = {};
2849
2850 assert(get_threadtask(thread) == task);
2851
2852 thread_mtx_lock(thread);
2853
2854 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2855 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2856 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2857 thread->requested_policy.thrp_qos, 0);
2858
2859 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2860
2861 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2862 primordial_qos, 0, &pend_token);
2863
2864 thread_mtx_unlock(thread);
2865
2866 thread_policy_update_complete_unlocked(thread, &pend_token);
2867
2868 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2869 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2870 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2871 primordial_qos, 0);
2872 }
2873
2874 /*
2875 * KPI for pthread kext
2876 *
2877 * Return a good guess at what the initial manager QoS will be
2878 * Dispatch can override this in userspace if it so chooses
2879 */
2880 thread_qos_t
task_get_default_manager_qos(task_t task)2881 task_get_default_manager_qos(task_t task)
2882 {
2883 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2884
2885 if (primordial_qos == THREAD_QOS_LEGACY) {
2886 primordial_qos = THREAD_QOS_USER_INITIATED;
2887 }
2888
2889 return primordial_qos;
2890 }
2891
2892 /*
2893 * Check if the kernel promotion on thread has changed
2894 * and apply it.
2895 *
2896 * thread locked on entry and exit
2897 */
2898 boolean_t
thread_recompute_kernel_promotion_locked(thread_t thread)2899 thread_recompute_kernel_promotion_locked(thread_t thread)
2900 {
2901 boolean_t needs_update = FALSE;
2902 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2903
2904 /*
2905 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2906 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2907 * and propagates the priority through the chain with the same cap, because as of now it does
2908 * not differenciate on the kernel primitive.
2909 *
2910 * If this assumption will change with the adoption of a kernel primitive that does not
2911 * cap the when adding/propagating,
2912 * then here is the place to put the generic cap for all kernel primitives
2913 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2914 */
2915 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2916
2917 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2918 KDBG(MACHDBG_CODE(
2919 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2920 thread_tid(thread),
2921 kern_promotion_schedpri,
2922 thread->kern_promotion_schedpri);
2923
2924 needs_update = TRUE;
2925 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2926 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2927 }
2928
2929 return needs_update;
2930 }
2931
2932 /*
2933 * Check if the user promotion on thread has changed
2934 * and apply it.
2935 *
2936 * thread locked on entry, might drop the thread lock
2937 * and reacquire it.
2938 */
2939 boolean_t
thread_recompute_user_promotion_locked(thread_t thread)2940 thread_recompute_user_promotion_locked(thread_t thread)
2941 {
2942 boolean_t needs_update = FALSE;
2943 struct task_pend_token pend_token = {};
2944 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2945 int old_base_pri = thread->base_pri;
2946 thread_qos_t qos_promotion;
2947
2948 /* Check if user promotion has changed */
2949 if (thread->user_promotion_basepri == user_promotion_basepri) {
2950 return needs_update;
2951 } else {
2952 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2953 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2954 thread_tid(thread),
2955 user_promotion_basepri,
2956 thread->user_promotion_basepri,
2957 0, 0);
2958 KDBG(MACHDBG_CODE(
2959 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2960 thread_tid(thread),
2961 user_promotion_basepri,
2962 thread->user_promotion_basepri);
2963 }
2964
2965 /* Update the user promotion base pri */
2966 thread->user_promotion_basepri = user_promotion_basepri;
2967 pend_token.tpt_force_recompute_pri = 1;
2968
2969 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2970 qos_promotion = THREAD_QOS_UNSPECIFIED;
2971 } else {
2972 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2973 }
2974
2975 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2976 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2977
2978 if (thread_get_waiting_turnstile(thread) &&
2979 thread->base_pri != old_base_pri) {
2980 needs_update = TRUE;
2981 }
2982
2983 thread_unlock(thread);
2984
2985 thread_policy_update_complete_unlocked(thread, &pend_token);
2986
2987 thread_lock(thread);
2988
2989 return needs_update;
2990 }
2991
2992 /*
2993 * Convert the thread user promotion base pri to qos for threads in qos world.
2994 * For priority above UI qos, the qos would be set to UI.
2995 */
2996 thread_qos_t
thread_user_promotion_qos_for_pri(int priority)2997 thread_user_promotion_qos_for_pri(int priority)
2998 {
2999 thread_qos_t qos;
3000 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3001 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3002 return qos;
3003 }
3004 }
3005 return THREAD_QOS_MAINTENANCE;
3006 }
3007
3008 /*
3009 * Set the thread's QoS Kevent override
3010 * Owned by the Kevent subsystem
3011 *
3012 * May be called with spinlocks held, but not spinlocks
3013 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3014 *
3015 * One 'add' must be balanced by one 'drop'.
3016 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3017 * Before the thread is deallocated, there must be 0 remaining overrides.
3018 */
3019 static void
thread_kevent_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3020 thread_kevent_override(thread_t thread,
3021 uint32_t qos_override,
3022 boolean_t is_new_override)
3023 {
3024 struct task_pend_token pend_token = {};
3025 boolean_t needs_update;
3026
3027 spl_t s = splsched();
3028 thread_lock(thread);
3029
3030 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3031
3032 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3033 assert(qos_override < THREAD_QOS_LAST);
3034
3035 if (is_new_override) {
3036 if (thread->kevent_overrides++ == 0) {
3037 /* This add is the first override for this thread */
3038 assert(old_override == THREAD_QOS_UNSPECIFIED);
3039 } else {
3040 /* There are already other overrides in effect for this thread */
3041 assert(old_override > THREAD_QOS_UNSPECIFIED);
3042 }
3043 } else {
3044 /* There must be at least one override (the previous add call) in effect */
3045 assert(thread->kevent_overrides > 0);
3046 assert(old_override > THREAD_QOS_UNSPECIFIED);
3047 }
3048
3049 /*
3050 * We can't allow lowering if there are several IPC overrides because
3051 * the caller can't possibly know the whole truth
3052 */
3053 if (thread->kevent_overrides == 1) {
3054 needs_update = qos_override != old_override;
3055 } else {
3056 needs_update = qos_override > old_override;
3057 }
3058
3059 if (needs_update) {
3060 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3061 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3062 qos_override, 0, &pend_token);
3063 assert(pend_token.tpt_update_sockets == 0);
3064 }
3065
3066 thread_unlock(thread);
3067 splx(s);
3068
3069 thread_policy_update_complete_unlocked(thread, &pend_token);
3070 }
3071
3072 void
thread_add_kevent_override(thread_t thread,uint32_t qos_override)3073 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3074 {
3075 thread_kevent_override(thread, qos_override, TRUE);
3076 }
3077
3078 void
thread_update_kevent_override(thread_t thread,uint32_t qos_override)3079 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3080 {
3081 thread_kevent_override(thread, qos_override, FALSE);
3082 }
3083
3084 void
thread_drop_kevent_override(thread_t thread)3085 thread_drop_kevent_override(thread_t thread)
3086 {
3087 struct task_pend_token pend_token = {};
3088
3089 spl_t s = splsched();
3090 thread_lock(thread);
3091
3092 assert(thread->kevent_overrides > 0);
3093
3094 if (--thread->kevent_overrides == 0) {
3095 /*
3096 * There are no more overrides for this thread, so we should
3097 * clear out the saturated override value
3098 */
3099
3100 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3101 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3102 0, &pend_token);
3103 }
3104
3105 thread_unlock(thread);
3106 splx(s);
3107
3108 thread_policy_update_complete_unlocked(thread, &pend_token);
3109 }
3110
3111 /*
3112 * Set the thread's QoS Workloop Servicer override
3113 * Owned by the Kevent subsystem
3114 *
3115 * May be called with spinlocks held, but not spinlocks
3116 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3117 *
3118 * One 'add' must be balanced by one 'drop'.
3119 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3120 * Before the thread is deallocated, there must be 0 remaining overrides.
3121 */
3122 static void
thread_servicer_override(thread_t thread,uint32_t qos_override,boolean_t is_new_override)3123 thread_servicer_override(thread_t thread,
3124 uint32_t qos_override,
3125 boolean_t is_new_override)
3126 {
3127 struct task_pend_token pend_token = {};
3128
3129 spl_t s = splsched();
3130 thread_lock(thread);
3131
3132 if (is_new_override) {
3133 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3134 } else {
3135 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3136 }
3137
3138 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3139 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3140 qos_override, 0, &pend_token);
3141
3142 thread_unlock(thread);
3143 splx(s);
3144
3145 assert(pend_token.tpt_update_sockets == 0);
3146 thread_policy_update_complete_unlocked(thread, &pend_token);
3147 }
3148
3149 void
thread_add_servicer_override(thread_t thread,uint32_t qos_override)3150 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3151 {
3152 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3153 assert(qos_override < THREAD_QOS_LAST);
3154
3155 thread_servicer_override(thread, qos_override, TRUE);
3156 }
3157
3158 void
thread_update_servicer_override(thread_t thread,uint32_t qos_override)3159 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3160 {
3161 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3162 assert(qos_override < THREAD_QOS_LAST);
3163
3164 thread_servicer_override(thread, qos_override, FALSE);
3165 }
3166
3167 void
thread_drop_servicer_override(thread_t thread)3168 thread_drop_servicer_override(thread_t thread)
3169 {
3170 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3171 }
3172
3173 void
thread_update_servicer_iotier_override(thread_t thread,uint8_t iotier_override)3174 thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3175 {
3176 struct task_pend_token pend_token = {};
3177 uint8_t current_iotier;
3178
3179 /* Check if the update is needed */
3180 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3181 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3182
3183 if (iotier_override == current_iotier) {
3184 return;
3185 }
3186
3187 spl_t s = splsched();
3188 thread_lock(thread);
3189
3190 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3191 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3192 iotier_override, 0, &pend_token);
3193
3194 thread_unlock(thread);
3195 splx(s);
3196
3197 assert(pend_token.tpt_update_sockets == 0);
3198 thread_policy_update_complete_unlocked(thread, &pend_token);
3199 }
3200
3201 /* Get current requested qos / relpri, may be called from spinlock context */
3202 thread_qos_t
thread_get_requested_qos(thread_t thread,int * relpri)3203 thread_get_requested_qos(thread_t thread, int *relpri)
3204 {
3205 int relprio_value = 0;
3206 thread_qos_t qos;
3207
3208 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3209 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3210 if (relpri) {
3211 *relpri = -relprio_value;
3212 }
3213 return qos;
3214 }
3215
3216 /*
3217 * This function will promote the thread priority
3218 * since exec could block other threads calling
3219 * proc_find on the proc. This boost must be removed
3220 * via call to thread_clear_exec_promotion.
3221 *
3222 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3223 */
3224 void
thread_set_exec_promotion(thread_t thread)3225 thread_set_exec_promotion(thread_t thread)
3226 {
3227 spl_t s = splsched();
3228 thread_lock(thread);
3229
3230 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3231
3232 thread_unlock(thread);
3233 splx(s);
3234 }
3235
3236 /*
3237 * This function will clear the exec thread
3238 * promotion set on the thread by thread_set_exec_promotion.
3239 */
3240 void
thread_clear_exec_promotion(thread_t thread)3241 thread_clear_exec_promotion(thread_t thread)
3242 {
3243 spl_t s = splsched();
3244 thread_lock(thread);
3245
3246 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3247
3248 thread_unlock(thread);
3249 splx(s);
3250 }
3251